diff --git a/.gitignore b/.gitignore index f4fb46f2e..73d9179fa 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,6 @@ *.iws *.ipr *.iml -*.ipr -*.iws *~ .vscode .metals @@ -27,4 +25,4 @@ spark-warehouse /**/job-override.properties /**/*.log /**/.factorypath - +/**/.scalafmt.conf diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 77aa2aedb..db0097d64 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -22,9 +22,20 @@ dnet45-releases https://maven.d4science.org/nexus/content/repositories/dnet45-releases + + DHPSite + ${dhp.site.stage.path}/dhp-build/dhp-code-style + + + + org.apache.maven.wagon + wagon-ssh + 2.10 + + @@ -35,7 +46,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.9.1 @@ -43,6 +54,7 @@ UTF-8 + sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop \ No newline at end of file diff --git a/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf new file mode 100644 index 000000000..0b5dbe0b4 --- /dev/null +++ b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf @@ -0,0 +1,21 @@ +style = defaultWithAlign + +align.openParenCallSite = false +align.openParenDefnSite = false +align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}] +continuationIndent.callSite = 2 +continuationIndent.defnSite = 2 +danglingParentheses = true +indentOperator = spray +maxColumn = 120 +newlines.alwaysBeforeTopLevelStatements = true +project.excludeFilters = [".*\\.sbt"] +rewrite.rules = [AvoidInfix] +rewrite.rules = [ExpandImportSelectors] +rewrite.rules = [RedundantBraces] +rewrite.rules = [RedundantParens] +rewrite.rules = [SortImports] +rewrite.rules = [SortModifiers] +rewrite.rules = [PreferCurlyFors] +spaces.inImportCurlyBraces = false +unindentTopLevelOperators = true \ No newline at end of file diff --git a/dhp-build/dhp-code-style/src/site/site.xml b/dhp-build/dhp-code-style/src/site/site.xml new file mode 100644 index 000000000..634a2c154 --- /dev/null +++ b/dhp-build/dhp-code-style/src/site/site.xml @@ -0,0 +1,21 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 12b999b9c..97fbdf45b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -10,6 +10,9 @@ pom This module is a container for the build tools used in dnet-hadoop + + true + dhp-code-style @@ -17,4 +20,12 @@ dhp-build-properties-maven-plugin + + + + DHPSite + ${dhp.site.stage.path}/dhp-build/ + + + diff --git a/dhp-build/src/site/site.xml b/dhp-build/src/site/site.xml new file mode 100644 index 000000000..2d9d769a2 --- /dev/null +++ b/dhp-build/src/site/site.xml @@ -0,0 +1,22 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c057123b1..7b18f0105 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -13,7 +13,51 @@ dhp-common jar + + + DHPSite + ${dhp.site.stage.path}/dhp-common + + + This module contains common utilities meant to be used across the dnet-hadoop submodules + + + + net.alchim31.maven + scala-maven-plugin + ${net.alchim31.maven.version} + + + scala-compile-first + initialize + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + scala-doc + process-resources + + doc + + + + + ${scala.version} + + + + + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java deleted file mode 100644 index 8ceee5c8a..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ /dev/null @@ -1,413 +0,0 @@ - -package eu.dnetlib.dhp.common; - -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.*; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class GraphResultMapper implements Serializable { - - public static Result map( - E in) { - - CommunityResult out = new CommunityResult(); - - eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; - Optional ort = Optional.ofNullable(input.getResulttype()); - if (ort.isPresent()) { - switch (ort.get().getClassid()) { - case "publication": - Optional journal = Optional - .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal()); - if (journal.isPresent()) { - Journal j = journal.get(); - Container c = new Container(); - c.setConferencedate(j.getConferencedate()); - c.setConferenceplace(j.getConferenceplace()); - c.setEdition(j.getEdition()); - c.setEp(j.getEp()); - c.setIss(j.getIss()); - c.setIssnLinking(j.getIssnLinking()); - c.setIssnOnline(j.getIssnOnline()); - c.setIssnPrinted(j.getIssnPrinted()); - c.setName(j.getName()); - c.setSp(j.getSp()); - c.setVol(j.getVol()); - out.setContainer(c); - out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); - } - break; - case "dataset": - eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; - Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); - Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); - - out - .setGeolocation( - Optional - .ofNullable(id.getGeolocation()) - .map( - igl -> igl - .stream() - .filter(Objects::nonNull) - .map(gli -> { - GeoLocation gl = new GeoLocation(); - gl.setBox(gli.getBox()); - gl.setPlace(gli.getPlace()); - gl.setPoint(gli.getPoint()); - return gl; - }) - .collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); - break; - case "software": - - eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input; - Optional - .ofNullable(is.getCodeRepositoryUrl()) - .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); - Optional - .ofNullable(is.getDocumentationUrl()) - .ifPresent( - value -> out - .setDocumentationUrl( - value - .stream() - .map(Field::getValue) - .collect(Collectors.toList()))); - - Optional - .ofNullable(is.getProgrammingLanguage()) - .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); - - out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); - break; - case "other": - - eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input; - out - .setContactgroup( - Optional - .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out - .setContactperson( - Optional - .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - out - .setTool( - Optional - .ofNullable(ir.getTool()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); - - break; - } - - Optional - .ofNullable(input.getAuthor()) - .ifPresent( - ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); - - // I do not map Access Right UNKNOWN or OTHER - - Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); - out - .setBestaccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - final List contributorList = new ArrayList<>(); - Optional - .ofNullable(input.getContributor()) - .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); - out.setContributor(contributorList); - - Optional - .ofNullable(input.getCountry()) - .ifPresent( - value -> out - .setCountry( - value - .stream() - .map( - c -> { - if (c.getClassid().equals((ModelConstants.UNKNOWN))) { - return null; - } - Country country = new Country(); - country.setCode(c.getClassid()); - country.setLabel(c.getClassname()); - Optional - .ofNullable(c.getDataInfo()) - .ifPresent( - provenance -> country - .setProvenance( - Provenance - .newInstance( - provenance - .getProvenanceaction() - .getClassname(), - c.getDataInfo().getTrust()))); - return country; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()))); - - final List coverageList = new ArrayList<>(); - Optional - .ofNullable(input.getCoverage()) - .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); - out.setCoverage(coverageList); - - out.setDateofcollection(input.getDateofcollection()); - - final List descriptionList = new ArrayList<>(); - Optional - .ofNullable(input.getDescription()) - .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); - out.setDescription(descriptionList); - Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); - if (oStr.isPresent()) { - out.setEmbargoenddate(oStr.get().getValue()); - } - - final List formatList = new ArrayList<>(); - Optional - .ofNullable(input.getFormat()) - .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); - out.setFormat(formatList); - out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); - - Optional> oInst = Optional - .ofNullable(input.getInstance()); - - if (oInst.isPresent()) { - out - .setInstance( - oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); - - } - - Optional oL = Optional.ofNullable(input.getLanguage()); - if (oL.isPresent()) { - eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); - } - Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); - if (oLong.isPresent()) { - out.setLastupdatetimestamp(oLong.get()); - } - Optional> otitle = Optional.ofNullable(input.getTitle()); - if (otitle.isPresent()) { - List iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setMaintitle(iTitle.get(0).getValue()); - } - - iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setSubtitle(iTitle.get(0).getValue()); - } - - } - - List pids = new ArrayList<>(); - Optional - .ofNullable(input.getPid()) - .ifPresent( - value -> value - .stream() - .forEach( - p -> pids - .add( - ControlledField - .newInstance(p.getQualifier().getClassid(), p.getValue())))); - out.setPid(pids); - oStr = Optional.ofNullable(input.getDateofacceptance()); - if (oStr.isPresent()) { - out.setPublicationdate(oStr.get().getValue()); - } - oStr = Optional.ofNullable(input.getPublisher()); - if (oStr.isPresent()) { - out.setPublisher(oStr.get().getValue()); - } - - List sourceList = new ArrayList<>(); - Optional - .ofNullable(input.getSource()) - .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue()))); - // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); - List subjectList = new ArrayList<>(); - Optional - .ofNullable(input.getSubject()) - .ifPresent( - value -> value - .forEach(s -> subjectList.add(getSubject(s)))); - - out.setSubjects(subjectList); - - out.setType(input.getResulttype().getClassid()); - } - - out - .setCollectedfrom( - input - .getCollectedfrom() - .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) - .collect(Collectors.toList())); - - return out; - - } - - private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { - CommunityInstance instance = new CommunityInstance(); - - setCommonValue(i, instance); - - instance - .setCollectedfrom( - KeyValue - .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); - - instance - .setHostedby( - KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); - - return instance; - - } - - private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { - Optional opAr = Optional - .ofNullable(i.getAccessright()); - if (opAr.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); - instance - .setAccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - Optional - .ofNullable(i.getLicense()) - .ifPresent(value -> instance.setLicense(value.getValue())); - Optional - .ofNullable(i.getDateofacceptance()) - .ifPresent(value -> instance.setPublicationdate(value.getValue())); - Optional - .ofNullable(i.getRefereed()) - .ifPresent(value -> instance.setRefereed(value.getClassname())); - Optional - .ofNullable(i.getInstancetype()) - .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); - - } - - private static Subject getSubject(StructuredProperty s) { - Subject subject = new Subject(); - subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); - Optional di = Optional.ofNullable(s.getDataInfo()); - if (di.isPresent()) { - Provenance p = new Provenance(); - p.setProvenance(di.get().getProvenanceaction().getClassname()); - p.setTrust(di.get().getTrust()); - subject.setProvenance(p); - } - - return subject; - } - - private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { - Author a = new Author(); - a.setFullname(oa.getFullname()); - a.setName(oa.getName()); - a.setSurname(oa.getSurname()); - a.setRank(oa.getRank()); - - Optional> oPids = Optional - .ofNullable(oa.getPid()); - if (oPids.isPresent()) { - Pid pid = getOrcid(oPids.get()); - if (pid != null) { - a.setPid(pid); - } - } - - return a; - } - - private static Pid getOrcid(List p) { - for (StructuredProperty pid : p) { - if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) { - Optional di = Optional.ofNullable(pid.getDataInfo()); - if (di.isPresent()) { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()), - Provenance - .newInstance( - di.get().getProvenanceaction().getClassname(), - di.get().getTrust())); - } else { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()) - - ); - } - - } - } - return null; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index d5f57849c..1c129ff9c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -57,9 +57,17 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); + } } + // add the term names as synonyms + vocs.vocs.values().forEach(voc -> { + voc.getTerms().values().forEach(term -> { + voc.addSynonym(term.getName().toLowerCase(), term.getId()); + }); + }); + return vocs; } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java similarity index 96% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index ea738836b..3f65d754f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.dedup; +package eu.dnetlib.dhp.oa.merge; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -38,7 +38,7 @@ public class DispatchEntitiesSparkJob { .requireNonNull( DispatchEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); + "/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java similarity index 98% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index a19f86380..e652bd5b6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.dedup; +package eu.dnetlib.dhp.oa.merge; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; @@ -53,7 +53,7 @@ public class GroupEntitiesSparkJob { .toString( GroupEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 592580ab8..351bd2dd5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -16,6 +16,8 @@ import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import eu.dnetlib.dhp.common.vocabulary.Vocabulary; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -86,6 +88,22 @@ public class GraphCleaningFunctions extends CleaningFunctions { } public static boolean filter(T value) { + if (Boolean.TRUE + .equals( + Optional + .ofNullable(value) + .map( + o -> Optional + .ofNullable(o.getDataInfo()) + .map( + d -> Optional + .ofNullable(d.getInvisible()) + .orElse(true)) + .orElse(true)) + .orElse(true))) { + return true; + } + if (value instanceof Datasource) { // nothing to evaluate here } else if (value instanceof Project) { @@ -115,7 +133,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { return true; } - public static T cleanup(T value) { + public static T cleanup(T value, VocabularyGroup vocs) { if (value instanceof Datasource) { // nothing to clean here } else if (value instanceof Project) { @@ -212,6 +230,15 @@ public class GraphCleaningFunctions extends CleaningFunctions { .map(GraphCleaningFunctions::cleanValue) .collect(Collectors.toList())); } + if (Objects.nonNull(r.getFormat())) { + r + .setFormat( + r + .getFormat() + .stream() + .map(GraphCleaningFunctions::cleanValue) + .collect(Collectors.toList())); + } if (Objects.nonNull(r.getDescription())) { r .setDescription( @@ -234,6 +261,38 @@ public class GraphCleaningFunctions extends CleaningFunctions { if (Objects.nonNull(r.getInstance())) { for (Instance i : r.getInstance()) { + if (!vocs.termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) { + if (r instanceof Publication) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof Dataset) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof Software) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof OtherResearchProduct) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } + } + if (Objects.nonNull(i.getPid())) { i.setPid(processPidCleaning(i.getPid())); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 720fe47fb..0a51e8600 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -47,6 +47,17 @@ public class OafMapperUtils { } public static Result mergeResults(Result left, Result right) { + + final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left); + final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right); + + if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) { + return left; + } + if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) { + return right; + } + if (new ResultTypeComparator().compare(left, right) < 0) { left.mergeFrom(right); return left; @@ -56,6 +67,18 @@ public class OafMapperUtils { } } + private static boolean isFromDelegatedAuthority(Result r) { + return Optional + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .filter(i -> Objects.nonNull(i.getCollectedfrom())) + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) + .orElse(false); + } + public static KeyValue keyValue(final String k, final String v) { final KeyValue kv = new KeyValue(); kv.setKey(k); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala new file mode 100644 index 000000000..f8afe9af4 --- /dev/null +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala @@ -0,0 +1,73 @@ +package eu.dnetlib.dhp.application + +import scala.io.Source + +/** This is the main Interface SparkApplication + * where all the Spark Scala class should inherit + */ +trait SparkScalaApplication { + + /** This is the path in the classpath of the json + * describes all the argument needed to run + */ + val propertyPath: String + + /** Utility to parse the arguments using the + * property json in the classpath identified from + * the variable propertyPath + * + * @param args the list of arguments + */ + def parseArguments(args: Array[String]): ArgumentApplicationParser = { + val parser = new ArgumentApplicationParser( + Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString + ) + parser.parseArgument(args) + parser + } + + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + def run(): Unit +} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.slf4j.Logger + +abstract class AbstractScalaApplication( + val propertyPath: String, + val args: Array[String], + log: Logger +) extends SparkScalaApplication { + + var parser: ArgumentApplicationParser = null + + var spark: SparkSession = null + + def initialize(): SparkScalaApplication = { + parser = parseArguments(args) + spark = createSparkSession() + this + } + + /** Utility for creating a spark session starting from parser + * + * @return a spark Session + */ + private def createSparkSession(): SparkSession = { + require(parser != null) + + val conf: SparkConf = new SparkConf() + val master = parser.get("master") + log.info(s"Creating Spark session: Master: $master") + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + } + +} diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala new file mode 100644 index 000000000..a995016a8 --- /dev/null +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -0,0 +1,442 @@ +package eu.dnetlib.dhp.sx.graph.scholix + +import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.sx.scholix._ +import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} +import eu.dnetlib.dhp.utils.DHPUtils +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.{Encoder, Encoders} +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse +import scala.collection.JavaConverters._ +import scala.io.Source + +object ScholixUtils extends Serializable { + + val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" + + val DATE_RELATION_KEY: String = "RelationDate" + + case class RelationVocabulary(original: String, inverse: String) {} + + case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} + + val relations: Map[String, RelationVocabulary] = { + val input = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json") + ) + .mkString + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + lazy val json: json4s.JValue = parse(input) + + json.extract[Map[String, RelationVocabulary]] + } + + def extractRelationDate(relation: Relation): String = { + + if (relation.getProperties == null || !relation.getProperties.isEmpty) + null + else { + val date = relation.getProperties.asScala + .find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)) + .map(p => p.getValue) + if (date.isDefined) + date.get + else + null + } + } + + def extractRelationDate(summary: ScholixSummary): String = { + + if (summary.getDate == null || summary.getDate.isEmpty) + null + else { + summary.getDate.get(0) + } + } + + def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = { + new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) + + } + + def generateScholixResourceFromResult(r: Result): ScholixResource = { + generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r)) + } + + val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = + new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable { + override def zero: RelatedEntities = null + + override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { + val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0 + val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0 + + if (b == null) + RelatedEntities(a._1, relatedDataset, relatedPublication) + else + RelatedEntities( + a._1, + b.relatedDataset + relatedDataset, + b.relatedPublication + relatedPublication + ) + } + + override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { + if (b1 != null && b2 != null) + RelatedEntities( + b1.id, + b1.relatedDataset + b2.relatedDataset, + b1.relatedPublication + b2.relatedPublication + ) + else if (b1 != null) + b1 + else + b2 + } + + override def finish(reduction: RelatedEntities): RelatedEntities = reduction + + override def bufferEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) + + override def outputEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) + } + + val scholixAggregator: Aggregator[(String, Scholix), Scholix, Scholix] = + new Aggregator[(String, Scholix), Scholix, Scholix] with Serializable { + override def zero: Scholix = null + + def scholix_complete(s: Scholix): Boolean = { + if (s == null || s.getIdentifier == null) { + false + } else if (s.getSource == null || s.getTarget == null) { + false + } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) + false + else + true + } + + override def reduce(b: Scholix, a: (String, Scholix)): Scholix = { + if (scholix_complete(b)) b else a._2 + } + + override def merge(b1: Scholix, b2: Scholix): Scholix = { + if (scholix_complete(b1)) b1 else b2 + } + + override def finish(reduction: Scholix): Scholix = reduction + + override def bufferEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + + override def outputEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + } + + def createInverseScholixRelation(scholix: Scholix): Scholix = { + val s = new Scholix + s.setPublicationDate(scholix.getPublicationDate) + s.setPublisher(scholix.getPublisher) + s.setLinkprovider(scholix.getLinkprovider) + s.setRelationship(inverseRelationShip(scholix.getRelationship)) + s.setSource(scholix.getTarget) + s.setTarget(scholix.getSource) + s.setIdentifier( + DHPUtils.md5( + s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}" + ) + ) + s + + } + + def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = { + if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) { + val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d => + new ScholixEntityId(d.getProvider.getName, d.getProvider.getIdentifiers) + }(collection.breakOut) + l + } else List() + } + + def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = { + if (summary.getDatasources != null && !summary.getDatasources.isEmpty) { + val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d => + new ScholixEntityId( + d.getDatasourceName, + List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava + ) + }(collection.breakOut) + l + } else List() + } + + def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { + if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { + + val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c => + new ScholixEntityId( + c.getValue, + List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava + ) + }.toList + l + } else List() + } + + def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = { + val s = new Scholix + s.setPublicationDate(scholix.getPublicationDate) + s.setPublisher(scholix.getPublisher) + s.setLinkprovider(scholix.getLinkprovider) + s.setRelationship(scholix.getRelationship) + s.setSource(scholix.getSource) + s.setTarget(generateScholixResourceFromSummary(target)) + s.setIdentifier( + DHPUtils.md5( + s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}" + ) + ) + s + } + + def generateCompleteScholix(scholix: Scholix, target: ScholixResource): Scholix = { + val s = new Scholix + s.setPublicationDate(scholix.getPublicationDate) + s.setPublisher(scholix.getPublisher) + s.setLinkprovider(scholix.getLinkprovider) + s.setRelationship(scholix.getRelationship) + s.setSource(scholix.getSource) + s.setTarget(target) + s.setIdentifier( + DHPUtils.md5( + s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}" + ) + ) + s + } + + def generateScholixResourceFromSummary(summaryObject: ScholixSummary): ScholixResource = { + val r = new ScholixResource + r.setIdentifier(summaryObject.getLocalIdentifier) + r.setDnetIdentifier(summaryObject.getId) + + r.setObjectType(summaryObject.getTypology.toString) + r.setObjectSubType(summaryObject.getSubType) + + if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty) + r.setTitle(summaryObject.getTitle.get(0)) + + if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) { + val l: List[ScholixEntityId] = + summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList + if (l.nonEmpty) + r.setCreator(l.asJava) + } + + if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty) + r.setPublicationDate(summaryObject.getDate.get(0)) + if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) { + val plist: List[ScholixEntityId] = + summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList + + if (plist.nonEmpty) + r.setPublisher(plist.asJava) + } + + if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) { + + val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala + .map(c => + new ScholixCollectedFrom( + new ScholixEntityId( + c.getDatasourceName, + List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava + ), + "collected", + "complete" + ) + ) + .toList + + if (l.nonEmpty) + r.setCollectedFrom(l.asJava) + + } + r + } + + def scholixFromSource(relation: Relation, source: ScholixResource): Scholix = { + if (relation == null || source == null) + return null + val s = new Scholix + var l: List[ScholixEntityId] = extractCollectedFrom(relation) + if (l.isEmpty) + l = extractCollectedFrom(source) + if (l.isEmpty) + return null + s.setLinkprovider(l.asJava) + var d = extractRelationDate(relation) + if (d == null) + d = source.getPublicationDate + + s.setPublicationDate(d) + + if (source.getPublisher != null && !source.getPublisher.isEmpty) { + s.setPublisher(source.getPublisher) + } + + val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) + if (semanticRelation == null) + return null + s.setRelationship( + new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse) + ) + s.setSource(source) + + s + } + + def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = { + + if (relation == null || source == null) + return null + + val s = new Scholix + + var l: List[ScholixEntityId] = extractCollectedFrom(relation) + if (l.isEmpty) + l = extractCollectedFrom(source) + if (l.isEmpty) + return null + + s.setLinkprovider(l.asJava) + + var d = extractRelationDate(relation) + if (d == null) + d = extractRelationDate(source) + + s.setPublicationDate(d) + + if (source.getPublisher != null && !source.getPublisher.isEmpty) { + val l: List[ScholixEntityId] = source.getPublisher.asScala + .map { p => + new ScholixEntityId(p, null) + }(collection.breakOut) + + if (l.nonEmpty) + s.setPublisher(l.asJava) + } + + val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) + if (semanticRelation == null) + return null + s.setRelationship( + new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse) + ) + s.setSource(generateScholixResourceFromSummary(source)) + + s + } + + def findURLForPID( + pidValue: List[StructuredProperty], + urls: List[String] + ): List[(StructuredProperty, String)] = { + pidValue.map { p => + val pv = p.getValue + + val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase)) + (p, r.orNull) + } + } + + def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { + if (r.getInstance() == null || r.getInstance().isEmpty) + return List() + r.getInstance() + .asScala + .filter(i => i.getUrl != null && !i.getUrl.isEmpty) + .filter(i => i.getPid != null && i.getUrl != null) + .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) + .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)) + .distinct + .toList + } + + def resultToSummary(r: Result): ScholixSummary = { + val s = new ScholixSummary + s.setId(r.getId) + if (r.getPid == null || r.getPid.isEmpty) + return null + + val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) + if (persistentIdentifiers.isEmpty) + return null + s.setLocalIdentifier(persistentIdentifiers.asJava) + if (r.isInstanceOf[Publication]) + s.setTypology(Typology.publication) + else + s.setTypology(Typology.dataset) + + s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) + + if (r.getTitle != null && r.getTitle.asScala.nonEmpty) { + val titles: List[String] = r.getTitle.asScala.map(t => t.getValue).toList + if (titles.nonEmpty) + s.setTitle(titles.asJava) + else + return null + } + + if (r.getAuthor != null && !r.getAuthor.isEmpty) { + val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList + if (authors.nonEmpty) + s.setAuthor(authors.asJava) + } + if (r.getInstance() != null) { + val dt: List[String] = r + .getInstance() + .asScala + .filter(i => i.getDateofacceptance != null) + .map(i => i.getDateofacceptance.getValue) + .toList + if (dt.nonEmpty) + s.setDate(dt.distinct.asJava) + } + if (r.getDescription != null && !r.getDescription.isEmpty) { + val d = r.getDescription.asScala.find(f => f != null && f.getValue != null) + if (d.isDefined) + s.setDescription(d.get.getValue) + } + + if (r.getSubject != null && !r.getSubject.isEmpty) { + val subjects: List[SchemeValue] = r.getSubject.asScala + .map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)) + .toList + if (subjects.nonEmpty) + s.setSubject(subjects.asJava) + } + + if (r.getPublisher != null) + s.setPublisher(List(r.getPublisher.getValue).asJava) + + if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { + val cf: List[CollectedFromType] = r.getCollectedfrom.asScala + .map(c => new CollectedFromType(c.getValue, c.getKey, "complete")) + .toList + if (cf.nonEmpty) + s.setDatasources(cf.distinct.asJava) + } + + s.setRelatedDatasets(0) + s.setRelatedPublications(0) + s.setRelatedUnknown(0) + + s + } + +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 4068f0abb..79629a171 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -107,7 +107,7 @@ class OafMapperUtilsTest { assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get()); assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get()); assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get()); - assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get()); + assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get()); assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get()); assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get()); assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get()); @@ -185,6 +185,22 @@ class OafMapperUtilsTest { .getClassid()); } + @Test + void testDelegatedAuthority() throws IOException { + Dataset d1 = read("dataset_2.json", Dataset.class); + Dataset d2 = read("dataset_delegated.json", Dataset.class); + + assertEquals(1, d2.getCollectedfrom().size()); + assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID)); + + Result res = OafMapperUtils.mergeResults(d1, d2); + + assertEquals(d2, res); + + System.out.println(OBJECT_MAPPER.writeValueAsString(res)); + + } + protected HashSet cfId(List collectedfrom) { return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json index 52e4e126a..c880edb7d 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json @@ -1 +1,140 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]} \ No newline at end of file +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", + "resuttype": {"classid": "dataset"}, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2011.03.013" + }, + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ], + "collectedfrom": [ + { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", + "value": "Repository B" + } + ], + "instance": [ + { + "refereed": { + "classid": "0000", + "classname": "UNKNOWN", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, + "hostedby": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "processingchargecurrency": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "EUR" + }, + "pid": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "Digital Object Identifier", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1371/journal.pone.0085605" + } + ], + "distributionlocation": "", + "url": ["https://doi.org/10.1371/journal.pone.0085605"], + "alternateIdentifier": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "PubMed ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "24454899.0" + } + ], + "collectedfrom": { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", + "value": "Repository B" + }, + "processingchargeamount": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "1022.02" + }, + "instancetype": { + "classid": "0004", + "classname": "Conference object", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + } + } + ] +} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json new file mode 100644 index 000000000..967c1181b --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json @@ -0,0 +1,140 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", + "resuttype": {"classid": "dataset"}, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2011.03.013" + }, + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ], + "collectedfrom": [ + { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + } + ], + "instance": [ + { + "refereed": { + "classid": "0000", + "classname": "UNKNOWN", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, + "hostedby": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "processingchargecurrency": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "EUR" + }, + "pid": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "Digital Object Identifier", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1371/journal.pone.0085605" + } + ], + "distributionlocation": "", + "url": ["https://doi.org/10.1371/journal.pone.0085605"], + "alternateIdentifier": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "PubMed ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "24454899.0" + } + ], + "collectedfrom": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "processingchargeamount": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "1022.02" + }, + "instancetype": { + "classid": "0004", + "classname": "Conference object", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + } + } + ] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java similarity index 52% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index c508d4dbc..b790d90cb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.createunresolvedentities; +package eu.dnetlib.dhp.actionmanager; import java.util.Optional; @@ -11,19 +11,29 @@ import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class Constants { public static final String DOI = "doi"; + public static final String DOI_CLASSNAME = "Digital Object Identifier"; + + public static final String DEFAULT_DELIMITER = ","; public static final String UPDATE_DATA_INFO_TYPE = "update"; public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; + public static final String UPDATE_SUBJECT_SDG_CLASS_ID = "subject:sdg"; public static final String FOS_CLASS_ID = "FOS"; public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; + public static final String SDG_CLASS_ID = "SDG"; + public static final String SDG_CLASS_NAME = "Sustainable Development Goals"; + public static final String NULL = "NULL"; public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -46,4 +56,37 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } + public static StructuredProperty getSubject(String sbj, String classid, String classname, + String diqualifierclassid) { + if (sbj.equals(NULL)) + return null; + StructuredProperty sp = new StructuredProperty(); + sp.setValue(sbj); + sp + .setQualifier( + OafMapperUtils + .qualifier( + classid, + classname, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES)); + sp + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, + OafMapperUtils + .qualifier( + diqualifierclassid, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); + + return sp; + + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java deleted file mode 100644 index a48b84a33..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java +++ /dev/null @@ -1,86 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.bipfinder; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.Result; - -/** - * Just collects all the atomic actions produced for the different results and saves them in - * outputpath for the ActionSet - */ -public class CollectAndSave implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); - - public static void main(String[] args) throws Exception { - - String jsonConfiguration = IOUtils - .toString( - Objects - .requireNonNull( - CollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"))); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}: ", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - collectAndSave(spark, inputPath, outputPath); - }); - } - - private static void collectAndSave(SparkSession spark, String inputPath, String outputPath) { - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - sc - .sequenceFile(inputPath + "/publication", Text.class, Text.class) - .union(sc.sequenceFile(inputPath + "/dataset", Text.class, Text.class)) - .union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class)) - .union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class)) - .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); - } - - private static void removeOutputDir(SparkSession spark, String path) { - HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java deleted file mode 100644 index 493e94417..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java +++ /dev/null @@ -1,28 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.bipfinder; - -import java.io.Serializable; - -/** - * Subset of the information of the generic results that are needed to create the atomic action - */ -public class PreparedResult implements Serializable { - private String id; // openaire id - private String value; // doi - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index f178451c1..ddf5f4adf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; +import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -15,7 +16,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; @@ -24,11 +24,15 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; /** @@ -46,7 +50,7 @@ public class SparkAtomicActionScoreJob implements Serializable { .toString( SparkAtomicActionScoreJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -65,14 +69,6 @@ public class SparkAtomicActionScoreJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath {}: ", outputPath); - final String bipScorePath = parser.get("bipScorePath"); - log.info("bipScorePath: {}", bipScorePath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - Class inputClazz = (Class) Class.forName(resultClassName); - SparkConf conf = new SparkConf(); runWithSparkSession( @@ -80,12 +76,11 @@ public class SparkAtomicActionScoreJob implements Serializable { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareResults(spark, inputPath, outputPath, bipScorePath, inputClazz); + prepareResults(spark, inputPath, outputPath); }); } - private static void prepareResults(SparkSession spark, String inputPath, String outputPath, - String bipScorePath, Class inputClazz) { + private static void prepareResults(SparkSession spark, String bipScorePath, String outputPath) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -101,41 +96,19 @@ public class SparkAtomicActionScoreJob implements Serializable { return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); - Dataset results = readPath(spark, inputPath, inputClazz); - - results.createOrReplaceTempView("result"); - - Dataset preparedResult = spark - .sql( - "select pIde.value value, id " + - "from result " + - "lateral view explode (pid) p as pIde " + - "where dataInfo.deletedbyinference = false and pIde.qualifier.classid = '" + DOI + "'") - .as(Encoders.bean(PreparedResult.class)); - bipScores - .joinWith( - preparedResult, bipScores.col("id").equalTo(preparedResult.col("value")), - "inner") - .map((MapFunction, BipScore>) value -> { - BipScore ret = value._1(); - ret.setId(value._2().getId()); - return ret; - }, Encoders.bean(BipScore.class)) - .groupByKey((MapFunction) BipScore::getId, Encoders.STRING()) - .mapGroups((MapGroupsFunction) (k, it) -> { - Result ret = new Result(); - ret.setDataInfo(getDataInfo()); - BipScore first = it.next(); - ret.setId(first.getId()); - ret.setMeasures(getMeasure(first)); - it.forEachRemaining(value -> ret.getMeasures().addAll(getMeasure(value))); + .map((MapFunction) bs -> { + Result ret = new Result(); + + ret.setId(bs.getId()); + + ret.setMeasures(getMeasure(bs)); return ret; }, Encoders.bean(Result.class)) .toJavaRDD() - .map(p -> new AtomicAction(inputClazz, p)) + .map(p -> new AtomicAction(Result.class, p)) .mapToPair( aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), new Text(OBJECT_MAPPER.writeValueAsString(aa)))) @@ -159,7 +132,21 @@ public class SparkAtomicActionScoreJob implements Serializable { KeyValue kv = new KeyValue(); kv.setValue(unit.getValue()); kv.setKey(unit.getKey()); - kv.setDataInfo(getDataInfo()); + kv + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, + OafMapperUtils + .qualifier( + UPDATE_MEASURE_BIP_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); return kv; }) .collect(Collectors.toList())); @@ -168,21 +155,6 @@ public class SparkAtomicActionScoreJob implements Serializable { .collect(Collectors.toList()); } - private static DataInfo getDataInfo() { - DataInfo di = new DataInfo(); - di.setInferred(false); - di.setInvisible(false); - di.setDeletedbyinference(false); - di.setTrust(""); - Qualifier qualifier = new Qualifier(); - qualifier.setClassid("sysimport:actionset"); - qualifier.setClassname("Harvested"); - qualifier.setSchemename("dnet:provenanceActions"); - qualifier.setSchemeid("dnet:provenanceActions"); - di.setProvenanceaction(qualifier); - return di; - } - private static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipDeserialize.java similarity index 91% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipDeserialize.java index d5b2ced7c..a70bca618 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipDeserialize.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; import java.util.ArrayList; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipScore.java similarity index 91% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipScore.java index 247546694..1ce20eaf4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/BipScore.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; import java.util.List; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/KeyValue.java similarity index 87% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/KeyValue.java index 6909a9634..33efc8ea0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/KeyValue.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Score.java similarity index 89% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Score.java index 7cc21b44d..5b3095cf2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipmodel/Score.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.bipmodel; import java.io.Serializable; import java.util.List; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java deleted file mode 100644 index 9dec3e862..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java +++ /dev/null @@ -1,77 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities; - -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.GetCSV; - -public class GetFOSData implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(GetFOSData.class); - - public static final char DEFAULT_DELIMITER = '\t'; - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - GetFOSData.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json")))); - - parser.parseArgument(args); - - // the path where the original fos csv file is stored - final String sourcePath = parser.get("sourcePath"); - log.info("sourcePath {}", sourcePath); - - // the path where to put the file as json - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}", outputPath); - - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); - - final String classForName = parser.get("classForName"); - log.info("classForName {}", classForName); - - final char delimiter = Optional - .ofNullable(parser.get("delimiter")) - .map(s -> s.charAt(0)) - .orElse(DEFAULT_DELIMITER); - log.info("delimiter {}", delimiter); - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - new GetFOSData().doRewrite(sourcePath, outputPath, classForName, delimiter, fileSystem); - - } - - public void doRewrite(String inputPath, String outputFile, String classForName, char delimiter, FileSystem fs) - throws IOException, ClassNotFoundException { - - // reads the csv and writes it as its json equivalent - try (InputStreamReader reader = new InputStreamReader(fs.open(new Path(inputPath)))) { - GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); - } - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java new file mode 100644 index 000000000..75fe42e90 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; +import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetFOSSparkJob implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(GetFOSSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + GetFOSSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + // the path where the original fos csv file is stored + final String sourcePath = parser.get("sourcePath"); + log.info("sourcePath {}", sourcePath); + + // the path where to put the file as json + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String delimiter = Optional + .ofNullable(parser.get("delimiter")) + .orElse(DEFAULT_DELIMITER); + + SparkConf sconf = new SparkConf(); + runWithSparkSession( + sconf, + isSparkSessionManaged, + spark -> { + getFOS( + spark, + sourcePath, + outputPath, + delimiter); + }); + } + + private static void getFOS(SparkSession spark, String sourcePath, String outputPath, String delimiter) { + Dataset fosData = spark + .read() + .format("csv") + .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(sourcePath); + + fosData.map((MapFunction) r -> { + FOSDataModel fosDataModel = new FOSDataModel(); + fosDataModel.setDoi(r.getString(0).toLowerCase()); + fosDataModel.setLevel1(r.getString(1)); + fosDataModel.setLevel2(r.getString(2)); + fosDataModel.setLevel3(r.getString(3)); + return fosDataModel; + }, Encoders.bean(FOSDataModel.class)) + .write() + .mode(SaveMode.Overwrite) + .json(outputPath); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java new file mode 100644 index 000000000..328075389 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetSDGSparkJob.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; +import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetSDGSparkJob implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(GetSDGSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + GetSDGSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + // the path where the original fos csv file is stored + final String sourcePath = parser.get("sourcePath"); + log.info("sourcePath {}", sourcePath); + + // the path where to put the file as json + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String delimiter = Optional + .ofNullable(parser.get("delimiter")) + .orElse(DEFAULT_DELIMITER); + + SparkConf sconf = new SparkConf(); + runWithSparkSession( + sconf, + isSparkSessionManaged, + spark -> { + getSDG( + spark, + sourcePath, + outputPath, + delimiter); + }); + } + + private static void getSDG(SparkSession spark, String sourcePath, String outputPath, String delimiter) { + Dataset sdgData = spark + .read() + .format("csv") + .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(sourcePath); + + sdgData.map((MapFunction) r -> { + SDGDataModel sdgDataModel = new SDGDataModel(); + sdgDataModel.setDoi(r.getString(0).toLowerCase()); + sdgDataModel.setSbj(r.getString(1)); + + return sdgDataModel; + }, Encoders.bean(SDGDataModel.class)) + .filter((FilterFunction) sdg -> sdg.getSbj() != null) + .write() + .mode(SaveMode.Overwrite) + .json(outputPath); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 3d68db27b..80573c71a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -1,17 +1,17 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.UPDATE_CLASS_NAME; +import static eu.dnetlib.dhp.actionmanager.Constants.*; +import static eu.dnetlib.dhp.actionmanager.Constants.UPDATE_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.hdfs.client.HdfsUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -24,14 +24,16 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipDeserialize; -import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipScore; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; @@ -40,7 +42,7 @@ public class PrepareBipFinder implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -76,7 +78,7 @@ public class PrepareBipFinder implements Serializable { }); } - private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { + private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -89,13 +91,44 @@ public class PrepareBipFinder implements Serializable { BipScore bs = new BipScore(); bs.setId(key); bs.setScoreList(entry.get(key)); + return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) .map((MapFunction) v -> { Result r = new Result(); + final String cleanedPid = CleaningFunctions.normalizePidValue(DOI, v.getId()); r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI)); - r.setMeasures(getMeasure(v)); + Instance inst = new Instance(); + inst.setMeasures(getMeasure(v)); + + inst + .setPid( + Arrays + .asList( + OafMapperUtils + .structuredProperty( + cleanedPid, + OafMapperUtils + .qualifier( + DOI, DOI_CLASSNAME, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES), + null))); + r.setInstance(Arrays.asList(inst)); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 5ae2f8c88..55e391932 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -10,8 +10,8 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -67,20 +67,36 @@ public class PrepareFOSSparkJob implements Serializable { private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); - fosDataset.flatMap((FlatMapFunction) v -> { - List fosList = new ArrayList<>(); - final String level1 = v.getLevel1(); - final String level2 = v.getLevel2(); - final String level3 = v.getLevel3(); - Arrays - .stream(v.getDoi().split("\u0002")) - .forEach(d -> fosList.add(FOSDataModel.newInstance(d, level1, level2, level3))); - return fosList.iterator(); - }, Encoders.bean(FOSDataModel.class)) - .map((MapFunction) value -> { + fosDataset + .groupByKey((MapFunction) v -> v.getDoi().toLowerCase(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { Result r = new Result(); - r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), DOI)); - r.setSubject(getSubjects(value)); + FOSDataModel first = it.next(); + r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); + + HashSet level1 = new HashSet<>(); + HashSet level2 = new HashSet<>(); + HashSet level3 = new HashSet<>(); + addLevels(level1, level2, level3, first); + it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); + List sbjs = new ArrayList<>(); + level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + r.setSubject(sbjs); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() @@ -89,45 +105,11 @@ public class PrepareFOSSparkJob implements Serializable { .json(outputPath + "/fos"); } - private static List getSubjects(FOSDataModel fos) { - return Arrays - .asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) - .stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - } - - private static StructuredProperty getSubject(String sbj) { - if (sbj.equals(NULL)) - return null; - StructuredProperty sp = new StructuredProperty(); - sp.setValue(sbj); - sp - .setQualifier( - OafMapperUtils - .qualifier( - FOS_CLASS_ID, - FOS_CLASS_NAME, - ModelConstants.DNET_SUBJECT_TYPOLOGIES, - ModelConstants.DNET_SUBJECT_TYPOLOGIES)); - sp - .setDataInfo( - OafMapperUtils - .dataInfo( - false, - UPDATE_DATA_INFO_TYPE, - true, - false, - OafMapperUtils - .qualifier( - UPDATE_SUBJECT_FOS_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - "")); - - return sp; - + private static void addLevels(HashSet level1, HashSet level2, HashSet level3, + FOSDataModel first) { + level1.add(first.getLevel1()); + level2.add(first.getLevel2()); + level3.add(first.getLevel3()); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java new file mode 100644 index 000000000..a31e380fe --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -0,0 +1,104 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static eu.dnetlib.dhp.actionmanager.Constants.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.utils.DHPUtils; + +public class PrepareSDGSparkJob implements Serializable { + private static final Logger log = LoggerFactory.getLogger(PrepareSDGSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareSDGSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String sourcePath = parser.get("sourcePath"); + log.info("sourcePath: {}", sourcePath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + doPrepare( + spark, + sourcePath, + + outputPath); + }); + } + + private static void doPrepare(SparkSession spark, String sourcePath, String outputPath) { + Dataset sdgDataset = readPath(spark, sourcePath, SDGDataModel.class); + + sdgDataset + .groupByKey((MapFunction) r -> r.getDoi().toLowerCase(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + Result r = new Result(); + r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); + SDGDataModel first = it.next(); + List sbjs = new ArrayList<>(); + sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); + it + .forEachRemaining( + s -> sbjs + .add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); + r.setSubject(sbjs); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); + return r; + }, Encoders.bean(Result.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/sdg"); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index 62b813602..3b9775094 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -20,7 +21,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Result; public class SparkSaveUnresolved implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); + private static final Logger log = LoggerFactory.getLogger(SparkSaveUnresolved.class); public static void main(String[] args) throws Exception { @@ -64,10 +65,22 @@ public class SparkSaveUnresolved implements Serializable { .map( (MapFunction) l -> OBJECT_MAPPER.readValue(l, Result.class), Encoders.bean(Result.class)) - .groupByKey((MapFunction) r -> r.getId(), Encoders.STRING()) + .groupByKey((MapFunction) Result::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = it.next(); - it.forEachRemaining(r -> ret.mergeFrom(r)); + it.forEachRemaining(r -> { + if (r.getInstance() != null) { + ret.setInstance(r.getInstance()); + } + if (r.getSubject() != null) { + if (ret.getSubject() != null) + ret.getSubject().addAll(r.getSubject()); + else + ret.setSubject(r.getSubject()); + } + + // ret.mergeFrom(r) + }); return ret; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java deleted file mode 100644 index f950d9260..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java +++ /dev/null @@ -1,28 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -/** - * Class that maps the model of the bipFinder! input data. - * Only needed for deserialization purposes - */ - -public class BipDeserialize extends HashMap> implements Serializable { - - public BipDeserialize() { - super(); - } - - public List get(String key) { - - if (super.get(key) == null) { - return new ArrayList<>(); - } - return super.get(key); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java deleted file mode 100644 index c36856a5b..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java +++ /dev/null @@ -1,30 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; - -import java.io.Serializable; -import java.util.List; - -/** - * Rewriting of the bipFinder input data by extracting the identifier of the result (doi) - */ - -public class BipScore implements Serializable { - private String id; // doi - private List scoreList; // unit as given in the inputfile - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getScoreList() { - return scoreList; - } - - public void setScoreList(List scoreList) { - this.scoreList = scoreList; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java index befb230cb..e98ba74a1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java @@ -6,19 +6,19 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByPosition; public class FOSDataModel implements Serializable { - @CsvBindByPosition(position = 1) + @CsvBindByPosition(position = 0) // @CsvBindByName(column = "doi") private String doi; - @CsvBindByPosition(position = 2) + @CsvBindByPosition(position = 1) // @CsvBindByName(column = "level1") private String level1; - @CsvBindByPosition(position = 3) + @CsvBindByPosition(position = 2) // @CsvBindByName(column = "level2") private String level2; - @CsvBindByPosition(position = 4) + @CsvBindByPosition(position = 3) // @CsvBindByName(column = "level3") private String level3; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java deleted file mode 100644 index 4384e4ba1..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java +++ /dev/null @@ -1,26 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; - -import java.io.Serializable; - -public class KeyValue implements Serializable { - - private String key; - private String value; - - public String getKey() { - return key; - } - - public void setKey(String key) { - this.key = key; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java new file mode 100644 index 000000000..98ba5045c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/SDGDataModel.java @@ -0,0 +1,47 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByPosition; + +public class SDGDataModel implements Serializable { + + @CsvBindByPosition(position = 0) +// @CsvBindByName(column = "doi") + private String doi; + + @CsvBindByPosition(position = 1) +// @CsvBindByName(column = "sdg") + private String sbj; + + public SDGDataModel() { + + } + + public SDGDataModel(String doi, String sbj) { + this.doi = doi; + this.sbj = sbj; + + } + + public static SDGDataModel newInstance(String d, String sbj) { + return new SDGDataModel(d, sbj); + } + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public String getSbj() { + return sbj; + } + + public void setSbj(String sbj) { + this.sbj = sbj; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java deleted file mode 100644 index 3d1cca9a0..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java +++ /dev/null @@ -1,30 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; - -import java.io.Serializable; -import java.util.List; - -/** - * represents the score in the input file - */ -public class Score implements Serializable { - - private String id; - private List unit; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getUnit() { - return unit; - } - - public void setUnit(List unit) { - this.unit = unit; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index ea5fea96f..f230a7fd7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; @@ -21,6 +22,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -83,10 +85,13 @@ public class CreateActionSetSparkJob implements Serializable { private static void extractContent(SparkSession spark, String inputPath, String outputPath, boolean shouldDuplicateRels) { spark - .sqlContext() - .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) + .read() + .textFile(inputPath + "/*") + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, COCI.class), + Encoders.bean(COCI.class)) .flatMap( - (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), + (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), Encoders.bean(Relation.class)) .filter((FilterFunction) value -> value != null) .toJavaRDD() @@ -98,26 +103,29 @@ public class CreateActionSetSparkJob implements Serializable { } - private static List createRelation(String value, boolean duplicate) { - String[] line = value.split(","); - if (!line[1].startsWith("10.")) { - return new ArrayList<>(); - } + private static List createRelation(COCI value, boolean duplicate) { + List relationList = new ArrayList<>(); - String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1])); - final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2])); + String citing = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); + final String cited = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); - relationList - .addAll( - getRelations( - citing, - cited)); + if(!citing.equals(cited)){ + relationList + .addAll( + getRelations( + citing, + cited)); - if (duplicate && line[1].endsWith(".refs")) { - citing = ID_PREFIX + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs")))); - relationList.addAll(getRelations(citing, cited)); + if (duplicate && value.getCiting().endsWith(".refs")) { + citing = ID_PREFIX + IdentifierFactory + .md5( + CleaningFunctions + .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); + relationList.addAll(getRelations(citing, cited)); + } } return relationList; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java new file mode 100644 index 000000000..3fd6e8e9a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -0,0 +1,103 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; +import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class ReadCOCI implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(ReadCOCI.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + ReadCOCI.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String[] inputFile = parser.get("inputFile").split(";"); + log.info("inputFile {}", inputFile.toString()); + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + SparkConf sconf = new SparkConf(); + + final String delimiter = Optional + .ofNullable(parser.get("delimiter")) + .orElse(DEFAULT_DELIMITER); + + runWithSparkSession( + sconf, + isSparkSessionManaged, + spark -> { + doRead( + spark, + workingPath, + inputFile, + outputPath, + delimiter); + }); + } + + private static void doRead(SparkSession spark, String workingPath, String[] inputFiles, + String outputPath, + String delimiter) throws IOException { + + for(String inputFile : inputFiles){ + String p_string = workingPath + "/" + inputFile + ".gz"; + + Dataset cociData = spark + .read() + .format("csv") + .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(p_string) + .repartition(100); + + cociData.map((MapFunction) row -> { + COCI coci = new COCI(); + coci.setOci(row.getString(0)); + coci.setCiting(row.getString(1)); + coci.setCited(row.getString(2)); + return coci; + }, Encoders.bean(COCI.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + inputFile); + } + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java new file mode 100644 index 000000000..ce087ce22 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByPosition; + +public class COCI implements Serializable { + private String oci; + + private String citing; + + private String cited; + + + public String getOci() { + return oci; + } + + public void setOci(String oci) { + this.oci = oci; + } + + public String getCiting() { + return citing; + } + + public void setCiting(String citing) { + this.citing = citing; + } + + public String getCited() { + return cited; + } + + public void setCited(String cited) { + this.cited = cited; + } + + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala deleted file mode 100644 index 7a87861db..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala +++ /dev/null @@ -1,69 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.scholix - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import org.apache.spark.SparkConf -import org.apache.spark.sql._ -import org.slf4j.{Logger, LoggerFactory} - -import scala.io.Source - -object SparkCreateActionset { - - def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/generate_actionset.json")).mkString) - parser.parseArgument(args) - - - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val sourcePath = parser.get("sourcePath") - log.info(s"sourcePath -> $sourcePath") - - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - - val workingDirFolder = parser.get("workingDirFolder") - log.info(s"workingDirFolder -> $workingDirFolder") - - implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val resultEncoders: Encoder[Result] = Encoders.kryo[Result] - implicit val relationEncoders: Encoder[Relation] = Encoders.kryo[Relation] - - import spark.implicits._ - - val relation = spark.read.load(s"$sourcePath/relation").as[Relation] - - relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource, r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") - - - val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] - - log.info("extract source and target Identifier involved in relations") - - - log.info("save relation filtered") - - relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") - - log.info("saving entities") - - val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) - - entities - .joinWith(idRelation, entities("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala deleted file mode 100644 index 1df7ea3fb..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala +++ /dev/null @@ -1,86 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.scholix - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} -import org.apache.hadoop.io.Text -import org.apache.hadoop.io.compress.GzipCodec -import org.apache.hadoop.mapred.SequenceFileOutputFormat -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -import scala.io.Source - -object SparkSaveActionSet { - - - def toActionSet(item: Oaf): (String, String) = { - val mapper = new ObjectMapper() - - item match { - case dataset: OafDataset => - val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset] - a.setClazz(classOf[OafDataset]) - a.setPayload(dataset) - (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case publication: Publication => - val a: AtomicAction[Publication] = new AtomicAction[Publication] - a.setClazz(classOf[Publication]) - a.setPayload(publication) - (publication.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case software: Software => - val a: AtomicAction[Software] = new AtomicAction[Software] - a.setClazz(classOf[Software]) - a.setPayload(software) - (software.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case orp: OtherResearchProduct => - val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct] - a.setClazz(classOf[OtherResearchProduct]) - a.setPayload(orp) - (orp.getClass.getCanonicalName, mapper.writeValueAsString(a)) - - case relation: Relation => - val a: AtomicAction[Relation] = new AtomicAction[Relation] - a.setClazz(classOf[Relation]) - a.setPayload(relation) - (relation.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case _ => - null - } - - } - - def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/save_actionset.json")).mkString) - parser.parseArgument(args) - - - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val sourcePath = parser.get("sourcePath") - log.info(s"sourcePath -> $sourcePath") - - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - - implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val tEncoder: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING) - - spark.read.load(sourcePath).as[Oaf] - .map(o => toActionSet(o)) - .filter(o => o != null) - .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec]) - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala deleted file mode 100644 index 6ce4920ed..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ /dev/null @@ -1,608 +0,0 @@ -package eu.dnetlib.dhp.datacite - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} -import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} -import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.commons.lang3.StringUtils -import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString} -import org.json4s.jackson.JsonMethods.parse - -import java.nio.charset.CodingErrorAction -import java.text.SimpleDateFormat -import java.time.LocalDate -import java.time.chrono.ThaiBuddhistDate -import java.time.format.DateTimeFormatter -import java.util.regex.Pattern -import java.util.{Date, Locale} -import scala.collection.JavaConverters._ -import scala.io.{Codec, Source} -import scala.language.postfixOps - -case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {} - -case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {} - -case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {} - -case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {} - -case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} - -case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {} - -case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} - -case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {} - -case class DateType(date: Option[String], dateType: Option[String]) {} - -case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {} - -object DataciteToOAFTransformation { - - val REL_TYPE_VALUE:String = "resultResult" - val DATE_RELATION_KEY = "RelationDate" - - val subRelTypeMapping: Map[String,(String,String)] = Map( - "References" ->("IsReferencedBy","relationship"), - "IsSupplementTo" ->("IsSupplementedBy","supplement"), - "IsPartOf" ->("HasPart","part"), - "HasPart" ->("IsPartOf","part"), - "IsVersionOf" ->("HasVersion","version"), - "HasVersion" ->("IsVersionOf","version"), - "IsIdenticalTo" ->("IsIdenticalTo","relationship"), - "IsPreviousVersionOf" ->("IsNewVersionOf","version"), - "IsContinuedBy" ->("Continues","relationship"), - "Continues" ->("IsContinuedBy","relationship"), - "IsNewVersionOf" ->("IsPreviousVersionOf","version"), - "IsSupplementedBy" ->("IsSupplementTo","supplement"), - "IsDocumentedBy" ->("Documents","relationship"), - "IsSourceOf" ->("IsDerivedFrom","relationship"), - "Cites" ->("IsCitedBy","citation"), - "IsCitedBy" ->("Cites","citation"), - "IsDerivedFrom" ->("IsSourceOf","relationship"), - "IsVariantFormOf" ->("IsDerivedFrom","version"), - "IsReferencedBy" ->("References","relationship"), - "IsObsoletedBy" ->("IsNewVersionOf","version"), - "Reviews" ->("IsReviewedBy","review"), - "Documents" ->("IsDocumentedBy","relationship"), - "IsCompiledBy" ->("Compiles","relationship"), - "Compiles" ->("IsCompiledBy","relationship"), - "IsReviewedBy" ->("Reviews","review") - ) - - implicit val codec: Codec = Codec("UTF-8") - codec.onMalformedInput(CodingErrorAction.REPLACE) - codec.onUnmappableCharacter(CodingErrorAction.REPLACE) - - val DOI_CLASS = "doi" - val SUBJ_CLASS = "keywords" - - - val j_filter: List[String] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString - s.lines.toList - } - - val mapper = new ObjectMapper() - val unknown_repository: HostedByMapType = HostedByMapType(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, ModelConstants.UNKNOWN_REPOSITORY.getValue, ModelConstants.UNKNOWN_REPOSITORY.getValue, Some(1.0F)) - - val dataInfo: DataInfo = generateDataInfo("0.9") - val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite") - - val hostedByMap: Map[String, HostedByMapType] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: org.json4s.JValue = parse(s) - json.extract[Map[String, HostedByMapType]] - } - - val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH) - val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) - - val funder_regex: List[(Pattern, String)] = List( - (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"), - (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::") - - ) - - val Date_regex: List[Pattern] = List( - //Y-M-D - Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE), - //M-D-Y - Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE), - //D-M-Y - Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE), - //Y - Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) - ) - - - def filter_json(json: String): Boolean = { - j_filter.exists(f => json.contains(f)) - } - - def toActionSet(item: Oaf): (String, String) = { - val mapper = new ObjectMapper() - - item match { - case dataset: OafDataset => - val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset] - a.setClazz(classOf[OafDataset]) - a.setPayload(dataset) - (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case publication: Publication => - val a: AtomicAction[Publication] = new AtomicAction[Publication] - a.setClazz(classOf[Publication]) - a.setPayload(publication) - (publication.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case software: Software => - val a: AtomicAction[Software] = new AtomicAction[Software] - a.setClazz(classOf[Software]) - a.setPayload(software) - (software.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case orp: OtherResearchProduct => - val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct] - a.setClazz(classOf[OtherResearchProduct]) - a.setPayload(orp) - (orp.getClass.getCanonicalName, mapper.writeValueAsString(a)) - - case relation: Relation => - val a: AtomicAction[Relation] = new AtomicAction[Relation] - a.setClazz(classOf[Relation]) - a.setPayload(relation) - (relation.getClass.getCanonicalName, mapper.writeValueAsString(a)) - case _ => - null - } - - } - - - def embargo_end(embargo_end_date: String): Boolean = { - val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) - val td = LocalDate.now() - td.isAfter(dt) - } - - - def extract_date(input: String): Option[String] = { - val d = Date_regex.map(pattern => { - val matcher = pattern.matcher(input) - if (matcher.find()) - matcher.group(0) - else - null - } - ).find(s => s != null) - - if (d.isDefined) { - val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get - try { - return Some(LocalDate.parse(a_date, df_en).toString) - } catch { - case _: Throwable => try { - return Some(LocalDate.parse(a_date, df_it).toString) - } catch { - case _: Throwable => - return None - } - } - } - d - } - - def fix_thai_date(input:String, format:String) :String = { - try { - val a_date = LocalDate.parse(input,DateTimeFormatter.ofPattern(format)) - val d = ThaiBuddhistDate.of(a_date.getYear, a_date.getMonth.getValue, a_date.getDayOfMonth) - LocalDate.from(d).toString - } catch { - case _: Throwable => "" - } - } - def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = { - if (resourceType != null && resourceType.nonEmpty) { - val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) - if (typeQualifier != null) - return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid)) - } - if (schemaOrg != null && schemaOrg.nonEmpty) { - val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg) - if (typeQualifier != null) - return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid)) - - } - if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) { - val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceTypeGeneral) - if (typeQualifier != null) - return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid)) - - } - null - } - - - def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): Result = { - val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) - if (typeQualifiers == null) - return null - val i = new Instance - i.setInstancetype(typeQualifiers._1) - typeQualifiers._2.getClassname match { - case "dataset" => - val r = new OafDataset - r.setInstance(List(i).asJava) - return r - case "publication" => - val r = new Publication - r.setInstance(List(i).asJava) - return r - case "software" => - val r = new Software - r.setInstance(List(i).asJava) - return r - case "other" => - val r = new OtherResearchProduct - r.setInstance(List(i).asJava) - return r - } - null - } - - - def available_date(input: String): Boolean = { - - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: org.json4s.JValue = parse(input) - val l: List[String] = for { - JObject(dates) <- json \\ "dates" - JField("dateType", JString(dateTypes)) <- dates - } yield dateTypes - - l.exists(p => p.equalsIgnoreCase("available")) - - } - - - /** - * As describe in ticket #6377 - * when the result come from figshare we need to remove subject - * and set Access rights OPEN. - * - * @param r - */ - def fix_figshare(r: Result): Unit = { - - if (r.getInstance() != null) { - val hosted_by_figshare = r.getInstance().asScala.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue)) - if (hosted_by_figshare) { - r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT())) - val l: List[StructuredProperty] = List() - r.setSubject(l.asJava) - } - } - - - } - - - def createDNetTargetIdentifier(pid: String, pidType: String, idPrefix: String): String = { - val f_part = s"$idPrefix|${pidType.toLowerCase}".padTo(15, '_') - s"$f_part::${IdentifierFactory.md5(pid.toLowerCase)}" - } - - def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = { - OafMapperUtils.structuredProperty(dt, q, null) - } - - def generateRelation(sourceId: String, targetId: String, relClass: String, cf: KeyValue, di: DataInfo): Relation = { - - val r = new Relation - r.setSource(sourceId) - r.setTarget(targetId) - r.setRelType(ModelConstants.RESULT_PROJECT) - r.setRelClass(relClass) - r.setSubRelType(ModelConstants.OUTCOME) - r.setCollectedfrom(List(cf).asJava) - r.setDataInfo(di) - r - - - } - - def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = { - val match_pattern = funder_regex.find(s => s._1.matcher(awardUri).find()) - - if (match_pattern.isDefined) { - val m = match_pattern.get._1 - val p = match_pattern.get._2 - val grantId = m.matcher(awardUri).replaceAll("$2") - val targetId = s"$p${DHPUtils.md5(grantId)}" - List( - generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo) -// REMOVED INVERSE RELATION since there is a specific method that should generate later -// generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo) - ) - } - else - List() - - } - - - def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup, exportLinks: Boolean): List[Oaf] = { - if (filter_json(input)) - return List() - - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json = parse(input) - - val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null) - val resourceTypeGeneral = (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null) - val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null) - - val doi = (json \ "attributes" \ "doi").extract[String] - if (doi.isEmpty) - return List() - - //Mapping type based on vocabularies dnet:publication_resource and dnet:result_typologies - val result = getResult(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) - if (result == null) - return List() - - - val doi_q = OafMapperUtils.qualifier("doi", "doi", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES) - val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) - result.setPid(List(pid).asJava) - result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) - result.setOriginalId(List(doi).asJava) - - val d = new Date(dateOfCollection * 1000) - val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) - - - result.setDateofcollection(ISO8601FORMAT.format(d)) - result.setDateoftransformation(ISO8601FORMAT.format(d)) - result.setDataInfo(dataInfo) - - val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) - - - val authors = creators.zipWithIndex.map { case (c, idx) => - val a = new Author - a.setFullname(c.name.orNull) - a.setName(c.givenName.orNull) - a.setSurname(c.familyName.orNull) - if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) { - a.setPid(c.nameIdentifiers.get.map(ni => { - val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null - if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) { - OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo) - } - else - null - - } - ) - .asJava) - } - if (c.affiliation.isDefined) - a.setAffiliation(c.affiliation.get.filter(af => af.nonEmpty).map(af => OafMapperUtils.field(af, dataInfo)).asJava) - a.setRank(idx + 1) - a - } - - - val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) - - result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => { - if (t.titleType.isEmpty) { - OafMapperUtils.structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null) - } else { - OafMapperUtils.structuredProperty(t.title.get, t.titleType.get, t.titleType.get, ModelConstants.DNET_DATACITE_TITLE, ModelConstants.DNET_DATACITE_TITLE, null) - } - }).asJava) - - if (authors == null || authors.isEmpty || !authors.exists(a => a != null)) - return List() - result.setAuthor(authors.asJava) - - val dates = (json \\ "dates").extract[List[DateType]] - val publication_year = (json \\ "publicationYear").extractOrElse[String](null) - - val i_date = dates - .filter(d => d.date.isDefined && d.dateType.isDefined) - .find(d => d.dateType.get.equalsIgnoreCase("issued")) - .map(d => extract_date(d.date.get)) - val a_date: Option[String] = dates - .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")) - .map(d => extract_date(d.date.get)) - .find(d => d != null && d.isDefined) - .map(d => d.get) - - if (a_date.isDefined) { - if(doi.startsWith("10.14457")) - result.setEmbargoenddate(OafMapperUtils.field(fix_thai_date(a_date.get,"[yyyy-MM-dd]"), null)) - else - result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null)) - } - if (i_date.isDefined && i_date.get.isDefined) { - if(doi.startsWith("10.14457")) { - result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get,"[yyyy-MM-dd]"), null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get,"[yyyy-MM-dd]"), null)) - } - else { - result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) - } - } - else if (publication_year != null) { - if(doi.startsWith("10.14457")) { - result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year","[dd-MM-yyyy]"), null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year","[dd-MM-yyyy]"), null)) - - } else { - result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) - result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) - } - } - - - result.setRelevantdate(dates.filter(d => d.date.isDefined && d.dateType.isDefined) - .map(d => (extract_date(d.date.get), d.dateType.get)) - .filter(d => d._1.isDefined) - .map(d => (d._1.get, vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase()))) - .filter(d => d._2 != null) - .map(d => generateOAFDate(d._1, d._2)).asJava) - - val subjects = (json \\ "subjects").extract[List[SubjectType]] - - result.setSubject(subjects.filter(s => s.subject.nonEmpty) - .map(s => - OafMapperUtils.structuredProperty(s.subject.get, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null) - ).asJava) - - - result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) - - val descriptions = (json \\ "descriptions").extract[List[DescriptionType]] - - result.setDescription( - descriptions - .filter(d => d.description.isDefined). - map(d => - OafMapperUtils.field(d.description.get, null) - ).filter(s => s != null).asJava) - - - val publisher = (json \\ "publisher").extractOrElse[String](null) - if (publisher != null) - result.setPublisher(OafMapperUtils.field(publisher, null)) - - - val language: String = (json \\ "language").extractOrElse[String](null) - - if (language != null) - result.setLanguage(vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language)) - - - val instance = result.getInstance().get(0) - - val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String] - - val accessRights: List[String] = for { - JObject(rightsList) <- json \\ "rightsList" - JField("rightsUri", JString(rightsUri)) <- rightsList - } yield rightsUri - - val aRights: Option[AccessRight] = accessRights.map(r => { - vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r) - }).find(q => q != null).map(q => { - val a = new AccessRight - a.setClassid(q.getClassid) - a.setClassname(q.getClassname) - a.setSchemeid(q.getSchemeid) - a.setSchemename(q.getSchemename) - a - }) - - - val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) - - if (client.isDefined) { - val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository) - instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name)) - instance.setCollectedfrom(DATACITE_COLLECTED_FROM) - instance.setUrl(List(s"https://dx.doi.org/$doi").asJava) - instance.setAccessright(access_rights_qualifier) - instance.setPid(result.getPid) - val license = accessRights - .find(r => r.startsWith("http") && r.matches(".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*")) - if (license.isDefined) - instance.setLicense(OafMapperUtils.field(license.get, null)) - } - - val awardUris: List[String] = for { - JObject(fundingReferences) <- json \\ "fundingReferences" - JField("awardUri", JString(awardUri)) <- fundingReferences - } yield awardUri - - result.setId(IdentifierFactory.createIdentifier(result)) - var relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null) - - fix_figshare(result) - - if (result.getId == null) - return List() - - if (exportLinks) { - val rels: List[RelatedIdentifierType] = for { - JObject(relIdentifier) <- json \\ "relatedIdentifiers" - JField("relationType", JString(relationType)) <- relIdentifier - JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier - JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier - } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) - - relations = relations ::: generateRelations(rels,result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null) - } - if (relations != null && relations.nonEmpty) { - List(result) ::: relations - } - else - List(result) - } - - private def generateRelations(rels: List[RelatedIdentifierType], id:String, date:String):List[Relation] = { - rels - .filter(r => - subRelTypeMapping.contains(r.relationType) && ( - r.relatedIdentifierType.equalsIgnoreCase("doi") || - r.relatedIdentifierType.equalsIgnoreCase("pmid") || - r.relatedIdentifierType.equalsIgnoreCase("arxiv")) - ) - .map(r => { - val rel = new Relation - rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) - rel.setDataInfo(dataInfo) - - val subRelType = subRelTypeMapping(r.relationType)._2 - rel.setRelType(REL_TYPE_VALUE) - rel.setSubRelType(subRelType) - rel.setRelClass(r.relationType) - - val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) - - rel.setProperties(List(dateProps).asJava) - - rel.setSource(id) - rel.setTarget(DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier,r.relatedIdentifierType)) - rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) - rel.getCollectedfrom.asScala.map(c => c.getValue).toList - rel - }).toList - } - - def generateDataInfo(trust: String): DataInfo = { - val di = new DataInfo - di.setDeletedbyinference(false) - di.setInferred(false) - di.setInvisible(false) - di.setTrust(trust) - di.setProvenanceaction(ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER) - di - } - - def generateDSId(input: String): String = { - val b = StringUtils.substringBefore(input, "::") - val a = StringUtils.substringAfter(input, "::") - s"10|$b::${DHPUtils.md5(a)}" - } - - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala deleted file mode 100644 index a63627d1c..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ /dev/null @@ -1,64 +0,0 @@ -package eu.dnetlib.dhp.datacite - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations -import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH -import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} -import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile -import eu.dnetlib.dhp.utils.ISLookupClientFactory -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -import scala.io.Source - -object GenerateDataciteDatasetSpark { - - val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass) - - def main(args: Array[String]): Unit = { - val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString) - parser.parseArgument(args) - val master = parser.get("master") - val sourcePath = parser.get("sourcePath") - val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks")) - val isLookupUrl: String = parser.get("isLookupUrl") - log.info("isLookupUrl: {}", isLookupUrl) - - val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) - val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) - val spark: SparkSession = SparkSession.builder().config(conf) - .appName(GenerateDataciteDatasetSpark.getClass.getSimpleName) - .master(master) - .getOrCreate() - - import spark.implicits._ - - implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] - - implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - - val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") - val mapper = new ObjectMapper() - val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) - val outputBasePath = cleanedMdStoreVersion.getHdfsPath - - log.info("outputBasePath: {}", outputBasePath) - val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" - - spark.read.load(sourcePath).as[DataciteType] - .filter(d => d.isActive) - .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) - .filter(d => d != null) - .flatMap(i => fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(targetPath) - - val total_items = spark.read.load(targetPath).as[Oaf].count() - writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala deleted file mode 100644 index 70dcc0184..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ /dev/null @@ -1,394 +0,0 @@ -package eu.dnetlib.dhp.sx.bio - -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils} -import eu.dnetlib.dhp.schema.oaf._ -import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString} -import org.json4s.jackson.JsonMethods.{compact, parse, render} -import collection.JavaConverters._ -object BioDBToOAF { - - case class EBILinkItem(id: Long, links: String) {} - - case class EBILinks(relType: String, date: String, title: String, pmid: String, targetPid: String, targetPidType: String, targetUrl: String) {} - - case class UniprotDate(date: String, date_info: String) {} - - case class ScholixResolved(pid: String, pidType: String, typology: String, tilte: List[String], datasource: List[String], date: List[String], authors: List[String]) {} - - val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") - val SUBJ_CLASS = "Keywords" - - val DATE_RELATION_KEY = "RelationDate" - - val resolvedURL: Map[String, String] = Map( - "genbank" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-wgs" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-p" -> "https://www.ncbi.nlm.nih.gov/protein/", - "ena" -> "https://www.ebi.ac.uk/ena/browser/view/", - "clinicaltrials.gov" -> "https://clinicaltrials.gov/ct2/show/", - "onim" -> "https://omim.org/entry/", - "refseq" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "geo" -> "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" - ) - - - val collectedFromMap: Map[String, KeyValue] = { - val PDBCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", "Protein Data Bank") - val enaCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|re3data_____::c2a591f440598b63d854556beaf01591", "European Nucleotide Archive") - val ncbiCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|re3data_____::7d4f90870fe1e493232c9e86c43ae6f6", "NCBI Nucleotide") - val UNIPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|re3data_____::296e1abaf1302897a6838d3588cd0310", "UniProtKB/Swiss-Prot") - val ElsevierCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier") - val springerNatureCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|openaire____::6e380d9cf51138baec8480f5a0ce3a2e", "Springer Nature") - val EBICollectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::83e60e09c222f206c725385f53d7e567c", "EMBL-EBIs Protein Data Bank in Europe (PDBe)") - val pubmedCollectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") - - UNIPROTCollectedFrom.setDataInfo(DATA_INFO) - PDBCollectedFrom.setDataInfo(DATA_INFO) - ElsevierCollectedFrom.setDataInfo(DATA_INFO) - EBICollectedFrom.setDataInfo(DATA_INFO) - pubmedCollectedFrom.setDataInfo(DATA_INFO) - enaCollectedFrom.setDataInfo(DATA_INFO) - ncbiCollectedFrom.setDataInfo(DATA_INFO) - springerNatureCollectedFrom.setDataInfo(DATA_INFO) - - Map( - "uniprot" -> UNIPROTCollectedFrom, - "pdb" -> PDBCollectedFrom, - "elsevier" -> ElsevierCollectedFrom, - "ebi" -> EBICollectedFrom, - "Springer Nature" -> springerNatureCollectedFrom, - "NCBI Nucleotide" -> ncbiCollectedFrom, - "European Nucleotide Archive" -> enaCollectedFrom, - "Europe PMC" -> pubmedCollectedFrom - ) - } - - def crossrefLinksToOaf(input: String): Oaf = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json = parse(input) - val source_pid = (json \ "Source" \ "Identifier" \ "ID").extract[String].toLowerCase - val source_pid_type = (json \ "Source" \ "Identifier" \ "IDScheme").extract[String].toLowerCase - - val target_pid = (json \ "Target" \ "Identifier" \ "ID").extract[String].toLowerCase - val target_pid_type = (json \ "Target" \ "Identifier" \ "IDScheme").extract[String].toLowerCase - - val relation_semantic = (json \ "RelationshipType" \ "Name").extract[String] - - val date = GraphCleaningFunctions.cleanDate((json \ "LinkedPublicationDate").extract[String]) - - createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type), collectedFromMap("elsevier"), "relationship", relation_semantic, date) - - } - - - def scholixResolvedToOAF(input: ScholixResolved): Oaf = { - - val d = new Dataset - - d.setPid( - List( - OafMapperUtils.structuredProperty(input.pid.toLowerCase, input.pidType.toLowerCase, input.pidType.toLowerCase, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) - ).asJava - ) - - d.setDataInfo(DATA_INFO) - - val nsPrefix = input.pidType.toLowerCase.padTo(12, '_') - d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.pid.toLowerCase}", true)) - - if (input.tilte != null && input.tilte.nonEmpty) - d.setTitle(List(OafMapperUtils.structuredProperty(input.tilte.head, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) - - d.setOriginalId(List(input.pid).asJava) - val i = new Instance - - i.setPid(d.getPid) - - if (resolvedURL.contains(input.pidType)) { - i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava) - } - - if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) - i.setInstancetype(OafMapperUtils.qualifier("0037", "Clinical Trial", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - else - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - - if (input.datasource == null || input.datasource.isEmpty) - return null - - val ds = input.datasource.head - d.setCollectedfrom(List(collectedFromMap(ds)).asJava) - i.setCollectedfrom(collectedFromMap(ds)) - d.setInstance(List(i).asJava) - - if (input.authors != null && input.authors.nonEmpty) { - val authors = input.authors.map(a => { - val authorOAF = new Author - authorOAF.setFullname(a) - authorOAF - }) - d.setAuthor(authors.asJava) - } - if (input.date != null && input.date.nonEmpty) { - val dt = input.date.head - i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) - d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) - } - d - } - - - def uniprotToOAF(input: String): List[Oaf] = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json = parse(input) - val pid = (json \ "pid").extract[String] - - val d = new Dataset - - d.setPid( - List( - OafMapperUtils.structuredProperty(pid, "uniprot", "uniprot", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) - ).asJava - ) - - d.setDataInfo(DATA_INFO) - d.setId(OafMapperUtils.createOpenaireId(50, s"uniprot_____::$pid", true)) - d.setCollectedfrom(List(collectedFromMap("uniprot")).asJava) - - val title: String = (json \ "title").extractOrElse[String](null) - - if (title != null) - d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) - - d.setOriginalId(List(pid).asJava) - val i = new Instance - - i.setPid(d.getPid) - i.setUrl(List(s"https://www.uniprot.org/uniprot/$pid").asJava) - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - - i.setCollectedfrom(collectedFromMap("uniprot")) - d.setInstance(List(i).asJava) - - val dates: List[UniprotDate] = for { - JObject(dateOBJ) <- json \ "dates" - JField("date", JString(date)) <- dateOBJ - JField("date_info", JString(date_info)) <- dateOBJ - } yield UniprotDate(GraphCleaningFunctions.cleanDate(date), date_info) - - val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null) - - - if (subjects != null) { - d.setSubject( - subjects.map(s => - OafMapperUtils.structuredProperty(s, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null) - ).asJava) - } - var i_date: Option[UniprotDate] = None - - if (dates.nonEmpty) { - i_date = dates.find(d => d.date_info.contains("entry version")) - if (i_date.isDefined) { - i.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) - d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) - } - val relevant_dates: List[StructuredProperty] = dates.filter(d => !d.date_info.contains("entry version")) - .map(date => OafMapperUtils.structuredProperty(date.date, ModelConstants.UNKNOWN, ModelConstants.UNKNOWN, ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) - if (relevant_dates != null && relevant_dates.nonEmpty) - d.setRelevantdate(relevant_dates.asJava) - d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) - } - - - val references_pmid: List[String] = for { - JObject(reference) <- json \ "references" - JField("PubMed", JString(pid)) <- reference - } yield pid - - val references_doi: List[String] = for { - JObject(reference) <- json \ "references" - JField(" DOI", JString(pid)) <- reference - } yield pid - - - if (references_pmid != null && references_pmid.nonEmpty) { - val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) - rel.getCollectedfrom - List(d, rel) - } - else if (references_doi != null && references_doi.nonEmpty) { - val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) - List(d, rel) - } - else - List(d) - } - - - def generate_unresolved_id(pid: String, pidType: String): String = { - s"unresolved::$pid::$pidType" - } - - - def createRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, subRelType: String, relClass: String, date: String): Relation = { - - val rel = new Relation - rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) - rel.setDataInfo(DATA_INFO) - - rel.setRelType(ModelConstants.RESULT_RESULT) - rel.setSubRelType(subRelType) - rel.setRelClass(relClass) - - rel.setSource(sourceId) - rel.setTarget(s"unresolved::$pid::$pidType") - - - val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) - - rel.setProperties(List(dateProps).asJava) - - rel.getTarget.startsWith("unresolved") - rel.setCollectedfrom(List(collectedFrom).asJava) - rel - - } - - - def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date: String): Relation = { - createRelation(pid, pidType, sourceId, collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date) - } - - - def pdbTOOaf(input: String): List[Oaf] = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json = parse(input) - val pdb = (json \ "pdb").extract[String].toLowerCase - - if (pdb.isEmpty) - return List() - - val d = new Dataset - - d.setPid( - List( - OafMapperUtils.structuredProperty(pdb, "pdb", "Protein Data Bank Identifier", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) - ).asJava - ) - - d.setCollectedfrom(List(collectedFromMap("pdb")).asJava) - d.setDataInfo(DATA_INFO) - d.setId(OafMapperUtils.createOpenaireId(50, s"pdb_________::$pdb", true)) - d.setOriginalId(List(pdb).asJava) - - val title = (json \ "title").extractOrElse[String](null) - - if (title == null) - return List() - d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) - - val authors: List[String] = (json \ "authors").extractOrElse[List[String]](null) - - if (authors != null) { - val convertedAuthors = authors.zipWithIndex.map { a => - - val res = new Author - res.setFullname(a._1) - res.setRank(a._2 + 1) - res - } - - d.setAuthor(convertedAuthors.asJava) - } - - val i = new Instance - - i.setPid(d.getPid) - i.setUrl(List(s"https://www.rcsb.org/structure/$pdb").asJava) - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - - i.setCollectedfrom(collectedFromMap("pdb")) - d.setInstance(List(i).asJava) - - val pmid = (json \ "pmid").extractOrElse[String](null) - - if (pmid != null) - List(d, createSupplementaryRelation(pmid, "pmid", d.getId, collectedFromMap("pdb"), null)) - else - List(d) - } - - - def extractEBILinksFromDump(input: String): EBILinkItem = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json = parse(input) - - val pmid = (json \ "publication" \ "pmid").extract[String] - val links = (json \ "links").extract[JObject] - EBILinkItem(pmid.toLong, compact(render(links))) - } - - - def EBITargetLinksFilter(input: EBILinks): Boolean = { - - input.targetPidType.equalsIgnoreCase("ena") || input.targetPidType.equalsIgnoreCase("pdb") || input.targetPidType.equalsIgnoreCase("uniprot") - - } - - - def parse_ebi_links(input: String): List[EBILinks] = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json = parse(input) - val pmid = (json \ "request" \ "id").extract[String] - for { - JObject(link) <- json \\ "Link" - JField("Target", JObject(target)) <- link - JField("RelationshipType", JObject(relType)) <- link - JField("Name", JString(relation)) <- relType - JField("PublicationDate", JString(publicationDate)) <- link - JField("Title", JString(title)) <- target - JField("Identifier", JObject(identifier)) <- target - JField("IDScheme", JString(idScheme)) <- identifier - JField("IDURL", JString(idUrl)) <- identifier - JField("ID", JString(id)) <- identifier - - } yield EBILinks(relation, GraphCleaningFunctions.cleanDate(publicationDate), title, pmid, id, idScheme, idUrl) - } - - - def convertEBILinksToOaf(input: EBILinks): List[Oaf] = { - val d = new Dataset - d.setCollectedfrom(List(collectedFromMap("ebi")).asJava) - d.setDataInfo(DATA_INFO) - d.setTitle(List(OafMapperUtils.structuredProperty(input.title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO)).asJava) - - val nsPrefix = input.targetPidType.toLowerCase.padTo(12, '_') - - d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.targetPid.toLowerCase}", true)) - d.setOriginalId(List(input.targetPid.toLowerCase).asJava) - - - d.setPid( - List( - OafMapperUtils.structuredProperty(input.targetPid.toLowerCase, input.targetPidType.toLowerCase, "Protein Data Bank Identifier", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO) - ).asJava - ) - - val i = new Instance - - i.setPid(d.getPid) - i.setUrl(List(input.targetUrl).asJava) - i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - - i.setCollectedfrom(collectedFromMap("ebi")) - d.setInstance(List(i).asJava) - i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) - d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) - - List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, GraphCleaningFunctions.cleanDate(input.date))) - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala deleted file mode 100644 index c6d5fdf74..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala +++ /dev/null @@ -1,146 +0,0 @@ -package eu.dnetlib.dhp.sx.bio.pubmed - -import scala.xml.MetaData -import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader} - - -/** - * - * @param xml - */ -class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] { - - var currentArticle:PMArticle = generateNextArticle() - - override def hasNext: Boolean = currentArticle!= null - - override def next(): PMArticle = { - val tmp = currentArticle - currentArticle = generateNextArticle() - tmp - } - - def extractAttributes(attrs:MetaData, key:String):String = { - - val res = attrs.get(key) - if (res.isDefined) { - val s =res.get - if (s != null && s.nonEmpty) - s.head.text - else - null - } - else null - } - - - def validate_Date(year:String, month:String, day:String):String = { - try { - f"${year.toInt}-${month.toInt}%02d-${day.toInt}%02d" - - } catch { - case _: Throwable =>null - } - } - - def generateNextArticle():PMArticle = { - - - var currentSubject:PMSubject = null - var currentAuthor: PMAuthor = null - var currentJournal: PMJournal = null - var currentGrant: PMGrant = null - var currNode: String = null - var currentYear = "0" - var currentMonth = "01" - var currentDay = "01" - var currentArticleType:String = null - - - - - - - while (xml.hasNext) { - xml.next match { - case EvElemStart(_, label, attrs, _) => - currNode = label - - label match { - case "PubmedArticle" => currentArticle = new PMArticle - case "Author" => currentAuthor = new PMAuthor - case "Journal" => currentJournal = new PMJournal - case "Grant" => currentGrant = new PMGrant - case "PublicationType" | "DescriptorName" => - currentSubject = new PMSubject - currentSubject.setMeshId(extractAttributes(attrs, "UI")) - case "ArticleId" => currentArticleType = extractAttributes(attrs,"IdType") - case _ => - } - case EvElemEnd(_, label) => - label match { - case "PubmedArticle" => return currentArticle - case "Author" => currentArticle.getAuthors.add(currentAuthor) - case "Journal" => currentArticle.setJournal(currentJournal) - case "Grant" => currentArticle.getGrants.add(currentGrant) - case "PubMedPubDate" => if (currentArticle.getDate== null) - currentArticle.setDate(validate_Date(currentYear,currentMonth,currentDay)) - case "PubDate" => currentJournal.setDate(s"$currentYear-$currentMonth-$currentDay") - case "DescriptorName" => currentArticle.getSubjects.add(currentSubject) - case "PublicationType" =>currentArticle.getPublicationTypes.add(currentSubject) - case _ => - } - case EvText(text) => - if (currNode!= null && text.trim.nonEmpty) - currNode match { - case "ArticleTitle" => { - if (currentArticle.getTitle==null) - currentArticle.setTitle(text.trim) - else - currentArticle.setTitle(currentArticle.getTitle + text.trim) - } - case "AbstractText" => { - if (currentArticle.getDescription==null) - currentArticle.setDescription(text.trim) - else - currentArticle.setDescription(currentArticle.getDescription + text.trim) - } - case "PMID" => currentArticle.setPmid(text.trim) - case "ArticleId" => if ("doi".equalsIgnoreCase(currentArticleType)) currentArticle.setDoi(text.trim) - case "Language" => currentArticle.setLanguage(text.trim) - case "ISSN" => currentJournal.setIssn(text.trim) - case "GrantID" => currentGrant.setGrantID(text.trim) - case "Agency" => currentGrant.setAgency(text.trim) - case "Country" => if (currentGrant != null) currentGrant.setCountry(text.trim) - case "Year" => currentYear = text.trim - case "Month" => currentMonth = text.trim - case "Day" => currentDay = text.trim - case "Volume" => currentJournal.setVolume( text.trim) - case "Issue" => currentJournal.setIssue (text.trim) - case "PublicationType" | "DescriptorName" => currentSubject.setValue(text.trim) - case "LastName" => { - if (currentAuthor != null) - currentAuthor.setLastName(text.trim) - } - case "ForeName" => if (currentAuthor != null) - currentAuthor.setForeName(text.trim) - case "Title" => - if (currentJournal.getTitle==null) - currentJournal.setTitle(text.trim) - else - currentJournal.setTitle(currentJournal.getTitle + text.trim) - case _ => - - } - case _ => - } - - } - null - } -} - - - - - diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json deleted file mode 100644 index 31771a40a..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "ip", - "paramLongName": "inputPath", - "paramDescription": "the URL from where to get the programme file", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - }, - { - "paramName": "rtn", - "paramLongName": "resultTableName", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - }, - { - "paramName": "bsp", - "paramLongName": "bipScorePath", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml similarity index 97% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml index e5ec3d0ae..d262cb6e0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml @@ -27,4 +27,4 @@ oozie.launcher.mapreduce.user.classpath.first true - \ No newline at end of file + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml index 5311a6614..a6fba853d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml @@ -1,9 +1,5 @@ - - inputPath - the input path of the resources to be extended - bipScorePath @@ -13,8 +9,61 @@ outputPath the path where to store the actionset + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -30,14 +79,8 @@ - - - - - - - + yarn cluster @@ -54,113 +97,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${inputPath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication - --bipScorePath${bipScorePath} - - - - - - - - yarn - cluster - Produces the atomic action with the bip finder scores for datasets - eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${inputPath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset - --bipScorePath${bipScorePath} - - - - - - - - yarn - cluster - Produces the atomic action with the bip finder scores for orp - eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${inputPath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct - --bipScorePath${bipScorePath} - - - - - - - - yarn - cluster - Produces the atomic action with the bip finder scores for software - eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${inputPath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software - --bipScorePath${bipScorePath} - - - - - - - - - - yarn - cluster - saves all the aa produced for the several types of results in the as output path - eu.dnetlib.dhp.actionmanager.bipfinder.CollectAndSave - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --inputPath${workingDir} + --inputPath${bipScorePath} --outputPath${outputPath} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json similarity index 60% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json index 050a25677..5a6a63774 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json @@ -19,15 +19,9 @@ "paramRequired": false }, { - "paramName": "hnn", - "paramLongName": "hdfsNameNode", - "paramDescription": "the path used to store the HostedByMap", - "paramRequired": true - }, - { - "paramName": "cfn", - "paramLongName": "classForName", - "paramDescription": "the path used to store the HostedByMap", - "paramRequired": true + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "the delimiter if different from the default one (,)", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml index d53504fe6..a80bf4fbd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -79,6 +79,7 @@ + @@ -106,17 +107,30 @@ - - eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSData - --hdfsNameNode${nameNode} + + yarn + cluster + Gets Data from FOS csv file + eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --sourcePath${fosPath} --outputPath${workingDir}/input/fos - --classForNameeu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel - + + yarn @@ -142,6 +156,55 @@ + + + yarn + cluster + Gets Data from SDG csv file + eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetSDGSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sdgPath} + --outputPath${workingDir}/input/sdg + + + + + + + + + yarn + cluster + Produces the unresolved from FOS! + eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/input/sdg + --outputPath${workingDir}/prepared + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json new file mode 100644 index 000000000..b57cb5d9a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json @@ -0,0 +1,37 @@ +[ + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the zipped opencitations file", + "paramRequired": true + }, + + + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, + { + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, + { + "paramName": "op", + "paramLongName": "outputPath", + "paramDescription": "the hdfs name node", + "paramRequired": true + }, + { + "paramName": "if", + "paramLongName": "inputFile", + "paramDescription": "the hdfs name node", + "paramRequired": true + } +] + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index d052791a3..aee2559ee 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -26,6 +26,7 @@ ${wf:conf('resumeFrom') eq 'DownloadDump'} ${wf:conf('resumeFrom') eq 'ExtractContent'} + ${wf:conf('resumeFrom') eq 'ReadContent'} @@ -60,6 +61,32 @@ --inputFile${inputFile} --workingPath${workingPath} + + + + + + + yarn + cluster + Produces the AS for OC + eu.dnetlib.dhp.actionmanager.opencitations.ReadCOCI + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --workingPath${workingPath}/COCI + --outputPath${workingPath}/COCI_JSON + --delimiter${delimiter} + --inputFile${inputFileCoci} + @@ -81,7 +108,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${workingPath}/COCI + --inputPath${workingPath}/COCI_JSON --outputPath${outputPath} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py new file mode 100644 index 000000000..db0431aae --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py @@ -0,0 +1,63 @@ +from urllib.request import urlopen +import json + + +def retrieve_datacite_clients(base_url): + datacite_clients = {} + while base_url is not None: + with urlopen(base_url) as response: + print(f"requesting {base_url}") + response_content = response.read() + data = json.loads(response_content) + if 'data' in data and len(data['data'])>0: + for item in data['data']: + datacite_clients[item['id'].lower()]= item['attributes']['re3data'].lower().replace("https://doi.org/","") + base_url = data['links']['next'] + else: + base_url = None + return datacite_clients + + +def retrieve_r3data(start_url): + r3data_clients = {} + page_number = 1 + base_url = start_url + while base_url is not None: + with urlopen(base_url) as response: + print(f"requesting {base_url}") + response_content = response.read() + data = json.loads(response_content) + if 'data' in data and len(data['data'])>0: + for item in data['data']: + r3data_clients[item['id'].lower()]= dict( + openaire_id= "re3data_____::"+item['attributes']['re3dataId'].lower(), + official_name=item['attributes']['repositoryName'] + ) + page_number +=1 + base_url = f"{start_url}&page[number]={page_number}" + else: + base_url = None + return r3data_clients + + + + + + +base_url ="https://api.datacite.org/clients?query=re3data_id:*&page[size]=250" + +dc = retrieve_datacite_clients(base_url) +r3 = retrieve_r3data("https://api.datacite.org/re3data?page[size]=250") + +result = {} + +for item in dc: + res = dc[item].lower() + if res not in r3: + print(f"missing {res} for {item} in dictionary") + else: + result[item.upper()]= dict(openaire_id=r3[res]["openaire_id"],datacite_name=r3[res]["official_name"], official_name=r3[res]["official_name"] ) + + +with open('hostedBy_map.json', 'w', encoding='utf8') as json_file: + json.dump(result, json_file, ensure_ascii=False, indent=1) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json index ddc70bc53..947a9a255 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json @@ -1,1032 +1,1042 @@ { - "SND.QOG": { - "openaire_id": "re3data_____::r3d100012231", - "datacite_name": "Quality of Government Institute", - "official_name": "Quality of Government Institute's Data", - "similarity": 0.8985507246376812 - }, - "GESIS.CESSDA": { - "openaire_id": "re3data_____::r3d100010202", - "datacite_name": "CESSDA ERIC", - "official_name": "CESSDA ERIC" - }, - "BL.CRAN": { - "openaire_id": "re3data_____::r3d100012068", - "datacite_name": "Cranfield University", - "official_name": "Cranfield Online Research Data" - }, - "SUL.OPENNEURO": { - "openaire_id": "re3data_____::r3d100010924", - "datacite_name": "OpenNeuro", - "official_name": "OpenNeuro" - }, - "UNAVCO.UNAVCO": { - "openaire_id": "re3data_____::r3d100010872", - "datacite_name": "UNAVCO", - "official_name": "UNAVCO" - }, - "SUL.SDR": { - "openaire_id": "re3data_____::r3d100010710", - "datacite_name": "Stanford Digital Repository", - "official_name": "Stanford Digital Repository" - }, - "DK.ICES": { - "openaire_id": "re3data_____::r3d100011288", - "datacite_name": "International Council for the Exploration of the Sea (ICES)", - "official_name": "International Council for the Exploration of the Sea datasets", - "similarity": 0.8833333333333333 - }, - "CISTI.DFOSCIMR": { - "openaire_id": "re3data_____::r3d100012039", - "datacite_name": "Bedford Institute of Oceanography - Fisheries and Oceans Canada - Ocean Data and Information Section", - "official_name": "Bedford Institute of Oceanography - Oceanographic Databases" - }, - "CSIC.DIGITAL": { - "openaire_id": "re3data_____::r3d100011076", - "datacite_name": "Digital CSIC", - "official_name": "DIGITAL.CSIC" - }, - "TIB.PANGAEA": { - "openaire_id": "re3data_____::r3d100010134", - "datacite_name": "PANGAEA", - "official_name": "PANGAEA" - }, - "PSU.DATACOM": { - "openaire_id": "re3data_____::r3d100010477", - "datacite_name": "Data Commons", - "official_name": "ANU Data Commons", - "similarity": 0.8571428571428571 - }, - "ANDS.CENTRE72": { - "openaire_id": "re3data_____::r3d100010451", - "datacite_name": "PARADISEC", - "official_name": "Pacific and Regional Archive for Digital Sources in Endangered Cultures" - }, - "BL.OXDB": { - "openaire_id": "re3data_____::r3d100011653", - "datacite_name": "Oxford University Library Service Databank", - "official_name": "DataBank, Bodleian Libraries, University of Oxford" - }, - "BL.STANDREW": { - "openaire_id": "re3data_____::r3d100012411", - "datacite_name": "University of St Andrews", - "official_name": "St Andrews Research portal - Research Data" - }, - "TIB.BAFG": { - "openaire_id": "re3data_____::r3d100011664", - "datacite_name": "Bundesanstalt f\u00fcr Gew\u00e4sserkunde", - "official_name": "Geoportal der BFG" - }, - "CRUI.UNIBO": { - "openaire_id": "re3data_____::r3d100012604", - "datacite_name": "Universit\u00e0 degli Studi di Bologna", - "official_name": "AMS Acta" - }, - "GDCC.ODUM-LIBRARY": { - "openaire_id": "re3data_____::r3d100000005", - "datacite_name": "UNC Libraries", - "official_name": "UNC Dataverse" - }, - "RG.RG": { - "openaire_id": "re3data_____::r3d100012227", - "datacite_name": "ResearchGate", - "official_name": "ResearchGate" - }, - "TIB.EUMETSAT": { - "openaire_id": "re3data_____::r3d100010232", - "datacite_name": "EUMETSAT", - "official_name": "Eumetsat" - }, - "SND.SMHI": { - "openaire_id": "re3data_____::r3d100011776", - "datacite_name": "Swedish Meteorological and Hydrological Institute open data", - "official_name": "Swedish Meteorological and Hydrological Institute open data" - }, - "NOAA.NCEI": { - "openaire_id": "re3data_____::r3d100011801", - "datacite_name": "National Oceanic and Atmospheric Administration (NOAA) National Centers for Environmental Information (NCEI)", - "official_name": "NCEI" - }, - "TIB.WDCC": { - "openaire_id": "re3data_____::r3d100010299", - "datacite_name": "World Data Center for Climate", - "official_name": "World Data Center for Climate" - }, - "CNGB.GIGADB": { - "openaire_id": "re3data_____::r3d100010478", - "datacite_name": "GigaDB", - "official_name": "GigaDB" - }, - "DELFT.VLIZ": { - "openaire_id": "re3data_____::r3d100010661", - "datacite_name": "Vlaams Instituut voor de Zee", - "official_name": "Flanders Marine Institute" - }, - "NUS.SB": { - "openaire_id": "re3data_____::r3d100012564", - "datacite_name": "National University of Singapore", - "official_name": "ScholarBank@NUS" - }, - "EDI.EDI": { - "openaire_id": "re3data_____::r3d100010272", - "datacite_name": "Environmental Data Initiative", - "official_name": "Environmental Data Initiative Repository" - }, - "INIST.ADISP": { - "openaire_id": "re3data_____::r3d100010494", - "datacite_name": "Quetelet PROGEDO Diffusion", - "official_name": "Quetelet PROGEDO Diffusion" - }, - "GESIS.SHARE": { - "openaire_id": "re3data_____::r3d100010430", - "datacite_name": "SHARE - ERIC", - "official_name": "Survey of Health, Ageing and Retirement in Europe" - }, - "ANDS.CENTRE-1": { - "openaire_id": "re3data_____::r3d100010864", - "datacite_name": "Griffith University", - "official_name": "Griffith University Research Data Repository" - }, - "BL.READING": { - "openaire_id": "re3data_____::r3d100012064", - "datacite_name": "University of Reading", - "official_name": "University of Reading Research Data Archive" - }, - "CORNELL.CISER": { - "openaire_id": "re3data_____::r3d100011056", - "datacite_name": "CISER Data Archive", - "official_name": "CISER Data Archive" - }, - "DRYAD.DRYAD": { - "openaire_id": "re3data_____::r3d100000044", - "datacite_name": "DRYAD", - "official_name": "DRYAD" - }, - "CDL.PISCO": { - "openaire_id": "re3data_____::r3d100010947", - "datacite_name": "Partnership for Interdisciplinary Studies of Coastal Oceans (PISCO)", - "official_name": "Partnership for Interdisciplinary Studies of Coastal Oceans" - }, - "IEEE.DATAPORT": { - "openaire_id": "re3data_____::r3d100012569", - "datacite_name": "IEEE DataPort", - "official_name": "IEEE DataPort" - }, - "DELFT.MAASTRO": { - "openaire_id": "re3data_____::r3d100011086", - "datacite_name": "MAASTRO Clinic", - "official_name": "CancerData.org" - }, - "USGS.PROD": { - "openaire_id": "re3data_____::r3d100010054", - "datacite_name": "USGS DOI Tool Production Environment", - "official_name": "U.S. Geological Survey" - }, - "GDCC.ODUM-DV": { - "openaire_id": "re3data_____::r3d100000005", - "datacite_name": "Odum Institute Dataverse", - "official_name": "UNC Dataverse" - }, - "CDL.SDSCSG": { - "openaire_id": "re3data_____::r3d100011690", - "datacite_name": "UCSD Signaling Gateway", - "official_name": "UCSD Signaling gateway" - }, - "ORBIS.NKN": { - "openaire_id": "re3data_____::r3d100011587", - "datacite_name": "Northwest Knowledge Network", - "official_name": "Northwest Knowledge Network" - }, - "ANDS.CENTRE63": { - "openaire_id": "re3data_____::r3d100010918", - "datacite_name": "Test: Atlas of Living Australia", - "official_name": "Atlas of Living Australia", - "similarity": 0.8928571428571429 - }, - "SML.TALKBANK": { - "openaire_id": "re3data_____::r3d100010887", - "datacite_name": "TalkBank", - "official_name": "TalkBank" - }, - "CORNELL.LIBRARY": { - "openaire_id": "re3data_____::r3d100012322", - "datacite_name": "Cornell University Library", - "official_name": "eCommons - Cornell's digital repository" - }, - "BL.SOTON": { - "openaire_id": "re3data_____::r3d100011245", - "datacite_name": "University of Southampton", - "official_name": "University of Southampton Institutional Research Repository" - }, - "GESIS.DB-BANK": { - "openaire_id": "re3data_____::r3d100012252", - "datacite_name": "Forschungsdaten- und Servicezentrum der Bundesbank", - "official_name": "Forschungsdaten- und Servicezentrum der Bundesbank" - }, - "ANDS.CENTRE68": { - "openaire_id": "re3data_____::r3d100010918", - "datacite_name": "Atlas of Living Australia", - "official_name": "Atlas of Living Australia" - }, - "ANDS.CENTRE69": { - "openaire_id": "re3data_____::r3d100010914", - "datacite_name": "Australian Ocean Data Network", - "official_name": "Australian Ocean Data Network Portal" - }, - "INIST.CDS": { - "openaire_id": "re3data_____::r3d100010584", - "datacite_name": "Strasbourg Astronomical Data Center", - "official_name": "Strasbourg Astronomical Data Center" - }, - "BL.NHM": { - "openaire_id": "re3data_____::r3d100011675", - "datacite_name": "Natural History Museum, London", - "official_name": "Natural History Museum, Data Portal" - }, - "BL.ADS": { - "openaire_id": "re3data_____::r3d100000006", - "datacite_name": "Archaeology Data Service", - "official_name": "Archaeology Data Service" - }, - "GDCC.JHU": { - "openaire_id": "re3data_____::r3d100011836", - "datacite_name": "Johns Hopkins University Library", - "official_name": "Johns Hopkins Data Archive Dataverse Network" - }, - "BL.ED": { - "openaire_id": "re3data_____::r3d100000047", - "datacite_name": "University of Edinburgh", - "official_name": "Edinburgh DataShare" - }, - "BL.EXETER": { - "openaire_id": "re3data_____::r3d100011202", - "datacite_name": "University of Exeter", - "official_name": "Open Research Exeter" - }, - "BL.NCL": { - "openaire_id": "re3data_____::r3d100012408", - "datacite_name": "Newcastle University", - "official_name": "NCL Data" - }, - "BROWN.BDR": { - "openaire_id": "re3data_____::r3d100011654", - "datacite_name": "Brown Digital Repository", - "official_name": "Brown Digital Repository" - }, - "GDCC.SYR-QDR": { - "openaire_id": "re3data_____::r3d100011038", - "datacite_name": "Syracuse University Qualitative Data Repository", - "official_name": "Qualitative Data Repository" - }, - "BL.BRISTOL": { - "openaire_id": "re3data_____::r3d100011099", - "datacite_name": "University of Bristol", - "official_name": "data.bris Research Data Repository" - }, - "DATACITE.DATACITE": { - "openaire_id": "openaire____::datacite", - "datacite_name": "DataCite", - "official_name": "Datacite" - }, - "ESTDOI.KEEL": { - "openaire_id": "re3data_____::r3d100011941", - "datacite_name": "Keeleressursid. The Center of Estonian Language Resources", - "official_name": "Center of Estonian Language Resources" - }, - "BL.ESSEX": { - "openaire_id": "re3data_____::r3d100012405", - "datacite_name": "University of Essex", - "official_name": "Research Data at Essex" - }, - "PURDUE.MDF": { - "openaire_id": "re3data_____::r3d100012080", - "datacite_name": "Univ Chicago Materials Data Facility", - "official_name": "Materials Data Facility" - }, - "DELFT.KNMI": { - "openaire_id": "re3data_____::r3d100011879", - "datacite_name": "KNMI Data Centre", - "official_name": "KNMI Data Centre" - }, - "CUL.CIESIN": { - "openaire_id": "re3data_____::r3d100010207", - "datacite_name": "Center for International Earth Science Information Network", - "official_name": "Center for International Earth Science Information Network" - }, - "WISC.NEOTOMA": { - "openaire_id": "re3data_____::r3d100011761", - "datacite_name": "Neotoma Paleoecological Database", - "official_name": "Neotoma Paleoecology Database", - "similarity": 0.9180327868852459 - }, - "IRIS.IRIS": { - "openaire_id": "re3data_____::r3d100010268", - "datacite_name": "Incorporated Research Institutions for Seismology", - "official_name": "Incorporated Research Institutions for Seismology" - }, - "ANDS.CENTRE50": { - "openaire_id": "re3data_____::r3d100012378", - "datacite_name": "Analysis and Policy Observatory", - "official_name": "Analysis and Policy Observatory" - }, - "FAO.RING": { - "openaire_id": "re3data_____::r3d100012571", - "datacite_name": "CIARD RING", - "official_name": "CIARD Ring" - }, - "CUL.R2R": { - "openaire_id": "re3data_____::r3d100010735", - "datacite_name": "Rolling Deck to Repository", - "official_name": "Rolling Deck to Repository" - }, - "DEMO.GRIIDC": { - "openaire_id": "re3data_____::r3d100011571", - "datacite_name": "Gulf of Mexico Research Initiative Information and Data Cooperative", - "official_name": "Gulf of Mexico Research Initiative Information and Data Cooperative" - }, - "ANDS.CENTRE-6": { - "openaire_id": "re3data_____::r3d100012268", - "datacite_name": "Curtin University", - "official_name": "Curtin University Research Data Collection" - }, - "ANDS.CENTRE-5": { - "openaire_id": "re3data_____::r3d100012013", - "datacite_name": "TERN Central Portal", - "official_name": "TERN Data Discovery portal" - }, - "FIGSHARE.UCT": { - "openaire_id": "re3data_____::r3d100012633", - "datacite_name": "University of Cape Town (UCT)", - "official_name": "ZivaHub" - }, - "BIBSYS.UIT-ORD": { - "openaire_id": "re3data_____::r3d100012538", - "datacite_name": "DataverseNO", - "official_name": "DataverseNO" - }, - "CISTI.CADC": { - "openaire_id": "re3data_____::r3d100000016", - "datacite_name": "Canadian Astronomy Data Centre", - "official_name": "The Canadian Astronomy Data Centre", - "similarity": 0.9375 - }, - "BL.CCDC": { - "openaire_id": "re3data_____::r3d100010197", - "datacite_name": "The Cambridge Crystallographic Data Centre", - "official_name": "The Cambridge Structural Database" - }, - "BL.UCLD": { - "openaire_id": "re3data_____::r3d100012417", - "datacite_name": "University College London", - "official_name": "UCL Discovery" - }, - "GESIS.RKI": { - "openaire_id": "re3data_____::r3d100010436", - "datacite_name": "'Health Monitoring' Research Data Centre at the Robert Koch Institute", - "official_name": "'Health Monitoring' Research Data Centre at the Robert Koch Institute" - }, - "BL.DRI": { - "openaire_id": "re3data_____::r3d100011805", - "datacite_name": "Digital Repository of Ireland", - "official_name": "Digital Repository of Ireland" - }, - "TIB.KIT-IMK": { - "openaire_id": "re3data_____::r3d100011956", - "datacite_name": "Institute for Meteorology and Climate Research - Atmospheric Trace Gases and Remote Sensing", - "official_name": "CARIBIC" - }, - "DOINZ.LANDCARE": { - "openaire_id": "re3data_____::r3d100011662", - "datacite_name": "Landcare Research New Zealand Ltd", - "official_name": "Landcare Research Data Repository" - }, - "DEMO.EMORY": { - "openaire_id": "re3data_____::r3d100011559", - "datacite_name": "The Cancer Imaging Archive", - "official_name": "The Cancer Imaging Archive" - }, - "UMN.DRUM": { - "openaire_id": "re3data_____::r3d100011393", - "datacite_name": "Data Repository for the University of Minnesota", - "official_name": "Data Repository for the University of Minnesota" - }, - "CISTI.SFU": { - "openaire_id": "re3data_____::r3d100012512", - "datacite_name": "Simon Fraser University", - "official_name": "SFU Radar" - }, - "GESIS.ICPSR": { - "openaire_id": "re3data_____::r3d100010255", - "datacite_name": "ICPSR", - "official_name": "Inter-university Consortium for Political and Social Research" - }, - "ANDS.CENTRE49": { - "openaire_id": "re3data_____::r3d100012145", - "datacite_name": "The University of Melbourne", - "official_name": "melbourne.figshare.com" - }, - "ZBW.IFO": { - "openaire_id": "re3data_____::r3d100010201", - "datacite_name": "LMU-ifo Economics & Business Data Center", - "official_name": "LMU-ifo Economics & Business Data Center" - }, - "TIB.BEILST": { - "openaire_id": "re3data_____::r3d100012329", - "datacite_name": "Beilstein-Institut zur F\u00f6rderung der Chemischen Wissenschaften", - "official_name": "STRENDA DB" - }, - "ZBW.ZBW-JDA": { - "openaire_id": "re3data_____::r3d100012190", - "datacite_name": "ZBW Journal Data Archive", - "official_name": "ZBW Journal Data Archive" - }, - "BL.UKDA": { - "openaire_id": "re3data_____::r3d100010215", - "datacite_name": "UK Data Archive", - "official_name": "UK Data Archive" - }, - "CERN.INSPIRE": { - "openaire_id": "re3data_____::r3d100011077", - "datacite_name": "inspirehep.net", - "official_name": "Inspire-HEP" - }, - "CISTI.OTNDC": { - "openaire_id": "re3data_____::r3d100012083", - "datacite_name": "Ocean Tracking Network", - "official_name": "Ocean Tracking Network" - }, - "CISTI.CC": { - "openaire_id": "re3data_____::r3d100012646", - "datacite_name": "Compute Canada", - "official_name": "Federated Research Data Repository" - }, - "SND.ICOS": { - "openaire_id": "re3data_____::r3d100012203", - "datacite_name": "ICOS Carbon Portal", - "official_name": "ICOS Carbon Portal" - }, - "BL.MENDELEY": { - "openaire_id": "re3data_____::r3d100011868", - "datacite_name": "Mendeley", - "official_name": "Mendeley Data" - }, - "DELFT.UU": { - "openaire_id": "re3data_____::r3d100011201", - "datacite_name": "Universiteit Utrecht", - "official_name": "DataverseNL" - }, - "GESIS.DSZ-BO": { - "openaire_id": "re3data_____::r3d100010439", - "datacite_name": "Data Service Center for Business and Organizational Data", - "official_name": "Data Service Center for Business and Organizational Data" - }, - "TIB.IPK": { - "openaire_id": "re3data_____::r3d100011647", - "datacite_name": "IPK Gatersleben", - "official_name": "IPK Gatersleben" - }, - "GDCC.HARVARD-DV": { - "openaire_id": "re3data_____::r3d100010051", - "datacite_name": "Harvard IQSS Dataverse", - "official_name": "Harvard Dataverse" - }, - "BL.LEEDS": { - "openaire_id": "re3data_____::r3d100011945", - "datacite_name": "University of Leeds", - "official_name": "Research Data Leeds Repository" - }, - "BL.BRUNEL": { - "openaire_id": "re3data_____::r3d100012140", - "datacite_name": "Brunel University London", - "official_name": "Brunel figshare" - }, - "DEMO.ENVIDAT": { - "openaire_id": "re3data_____::r3d100012587", - "datacite_name": "EnviDat", - "official_name": "EnviDat" - }, - "GDCC.NTU": { - "openaire_id": "re3data_____::r3d100012440", - "datacite_name": "Nanyang Technological University", - "official_name": "DR-NTU (Data)" - }, - "UNM.DATAONE": { - "openaire_id": "re3data_____::r3d100000045", - "datacite_name": "DataONE", - "official_name": "DataONE" - }, - "CSC.NRD": { - "openaire_id": "re3data_____::r3d100012157", - "datacite_name": "Ministry of Culture and Education", - "official_name": "IDA Research Data Storage Service" - }, - "GESIS.DIPF": { - "openaire_id": "re3data_____::r3d100010390", - "datacite_name": "Research Data Centre for Education", - "official_name": "Research Data Centre for Education" - }, - "BL.HALLAM": { - "openaire_id": "re3data_____::r3d100011909", - "datacite_name": "Sheffield Hallam University", - "official_name": "Sheffield Hallam University Research Data Archive" - }, - "BL.LSHTM": { - "openaire_id": "re3data_____::r3d100011800", - "datacite_name": "London School of Hygiene and Tropical Medicine", - "official_name": "LSHTM Data Compass" - }, - "SUBGOE.DARIAH": { - "openaire_id": "re3data_____::r3d100011345", - "datacite_name": "Digital Research Infrastructure for the Arts and Humanities", - "official_name": "DARIAH-DE Repository" - }, - "SND.SU": { - "openaire_id": "re3data_____::r3d100012147", - "datacite_name": "Stockholm University", - "official_name": "Stockholm University repository for data" - }, - "GESIS.INDEPTH": { - "openaire_id": "re3data_____::r3d100011392", - "datacite_name": "INDEPTH Network", - "official_name": "INDEPTH Data Repository" - }, - "TIB.FLOSS": { - "openaire_id": "re3data_____::r3d100010863", - "datacite_name": "FLOSS Project, Syracuse University", - "official_name": "FLOSSmole" - }, - "ETHZ.WGMS": { - "openaire_id": "re3data_____::r3d100010627", - "datacite_name": "World Glacier Monitoring Service", - "official_name": "World Glacier Monitoring Service" - }, - "BL.UEL": { - "openaire_id": "re3data_____::r3d100012414", - "datacite_name": "University of East London", - "official_name": "Data.uel" - }, - "DELFT.DATA4TU": { - "openaire_id": "re3data_____::r3d100010216", - "datacite_name": "4TU.Centre for Research Data", - "official_name": "4TU.Centre for Research Data" - }, - "GESIS.IANUS": { - "openaire_id": "re3data_____::r3d100012361", - "datacite_name": "IANUS - FDZ Arch\u00e4ologie & Altertumswissenschaften", - "official_name": "IANUS Datenportal" - }, - "CDL.UCSDCCA": { - "openaire_id": "re3data_____::r3d100011655", - "datacite_name": "California Coastal Atlas", - "official_name": "California Coastal Atlas" - }, - "VIVA.VT": { - "openaire_id": "re3data_____::r3d100012601", - "datacite_name": "Virginia Tech", - "official_name": "VTechData" - }, - "ANDS.CENTRE39": { - "openaire_id": "re3data_____::r3d100011640", - "datacite_name": "University of the Sunshine Coast", - "official_name": "USC Research Bank research data" - }, - "DEMO.OPENKIM": { - "openaire_id": "re3data_____::r3d100011864", - "datacite_name": "OpenKIM", - "official_name": "OpenKIM" - }, - "INIST.OTELO": { - "openaire_id": "re3data_____::r3d100012505", - "datacite_name": "Observatoire Terre Environnement de Lorraine", - "official_name": "ORDaR" - }, - "INIST.ILL": { - "openaire_id": "re3data_____::r3d100012072", - "datacite_name": "Institut Laue-Langevin", - "official_name": "ILL Data Portal" - }, - "ANDS.CENTRE31": { - "openaire_id": "re3data_____::r3d100012378", - "datacite_name": "Test: Analysis and Policy Observatory", - "official_name": "Analysis and Policy Observatory", - "similarity": 0.9117647058823529 - }, - "ANDS.CENTRE30": { - "openaire_id": "re3data_____::r3d100010917", - "datacite_name": "Test: Geoscience Australia", - "official_name": "Geoscience Australia", - "similarity": 0.8695652173913043 - }, - "BL.SALFORD": { - "openaire_id": "re3data_____::r3d100012144", - "datacite_name": "University of Salford", - "official_name": "University of Salford Data Repository" - }, - "CERN.HEPDATA": { - "openaire_id": "re3data_____::r3d100010081", - "datacite_name": "HEPData.net", - "official_name": "HEPData" - }, - "ETHZ.E-COLL": { - "openaire_id": "re3data_____::r3d100012557", - "datacite_name": "ETH Z\u00fcrich Research Collection", - "official_name": "ETH Z\u00fcrich Research Collection" - }, - "GBIF.GBIF": { - "openaire_id": "re3data_____::r3d100000039", - "datacite_name": "Global Biodiversity Information Facility", - "official_name": "Global Biodiversity Information Facility" - }, - "ORNLDAAC.DAAC": { - "openaire_id": "re3data_____::r3d100000037", - "datacite_name": "Oak Ridge National Laboratory Distributed Active Archive Center", - "official_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics" - }, - "KAUST.KAUSTREPO": { - "openaire_id": "re3data_____::r3d100011898", - "datacite_name": "KAUST Research Repository", - "official_name": "UWA Research Repository", - "similarity": 0.875 - }, - "ZBW.ZEW": { - "openaire_id": "re3data_____::r3d100010399", - "datacite_name": "Zentrum f\u00fcr Europ\u00e4ische Wirtschaftsforschung GmbH (ZEW)", - "official_name": "ZEW Forschungsdatenzentrum" - }, - "SML.TDAR": { - "openaire_id": "re3data_____::r3d100010347", - "datacite_name": "Digital Antiquity (TDAR)", - "official_name": "tDAR" - }, - "GESIS.CSDA": { - "openaire_id": "re3data_____::r3d100010484", - "datacite_name": "Czech Social Science Data Archive", - "official_name": "Czech Social Science Data Archive" - }, - "SND.BOLIN": { - "openaire_id": "re3data_____::r3d100011699", - "datacite_name": "Bolin Centre Database", - "official_name": "Bolin Centre Database" - }, - "MLA.HC": { - "openaire_id": "re3data_____::r3d100012309", - "datacite_name": "Humanities Commons", - "official_name": "Humanities Commons" - }, - "CDL.IDASHREP": { - "openaire_id": "re3data_____::r3d100010382", - "datacite_name": "iDASH Repository", - "official_name": "IDS Repository", - "similarity": 0.8666666666666667 - }, - "ZBMED.SNSB": { - "openaire_id": "re3data_____::r3d100011873", - "datacite_name": "Staatliche Naturwissenschaftliche Sammlungen Bayerns", - "official_name": "Staatliche Naturwissenschaftliche Sammlungen Bayerns - datasets", - "similarity": 0.9043478260869565 - }, - "ORBIS.OHSU": { - "openaire_id": "re3data_____::r3d100012244", - "datacite_name": "Oregon Health Sciences University", - "official_name": "OHSU Digital Commons" - }, - "DARTLIB.CRAWDAD": { - "openaire_id": "re3data_____::r3d100010716", - "datacite_name": "CRAWDAD", - "official_name": "CRAWDAD" - }, - "CDL.CCHDO": { - "openaire_id": "re3data_____::r3d100010831", - "datacite_name": "CLIVAR and Carbon Hydrographic Data Office", - "official_name": "Climate Variability and Predictability and Carbon Hydrographic Data Office" - }, - "GESIS.AUSSDA": { - "openaire_id": "re3data_____::r3d100010483", - "datacite_name": "Austrian Social Science Data Archive", - "official_name": "AUSSDA" - }, - "NSIDC.DATACTR": { - "openaire_id": "re3data_____::r3d100010110", - "datacite_name": "National Snow and Ice Data Center", - "official_name": "National Snow and Ice Data Center" - }, - "TIB.RADAR": { - "openaire_id": "re3data_____::r3d100012330", - "datacite_name": "FIZ Karlsruhe \u2013 Leibniz-Institut f\u00fcr Informationsinfrastruktur", - "official_name": "RADAR" - }, - "KIM.OPENKIM": { - "openaire_id": "re3data_____::r3d100011864", - "datacite_name": "Open Knowledgebase of Interatomic Models (OpenKIM)", - "official_name": "OpenKIM" - }, - "BL.LBORO": { - "openaire_id": "re3data_____::r3d100012143", - "datacite_name": "Loughborough University", - "official_name": "Loughborough Data Repository" - }, - "GESIS.ZPID": { - "openaire_id": "re3data_____::r3d100010328", - "datacite_name": "GESIS.ZPID", - "official_name": "PsychData" - }, - "SML.TCIA": { - "openaire_id": "re3data_____::r3d100011559", - "datacite_name": "The Cancer Imaging Archive", - "official_name": "The Cancer Imaging Archive" - }, - "CDL.IRIS": { - "openaire_id": "re3data_____::r3d100010268", - "datacite_name": "Incorporated Research Institutions for Seismology", - "official_name": "Incorporated Research Institutions for Seismology" - }, - "BIBSYS.NMDC": { - "openaire_id": "re3data_____::r3d100012291", - "datacite_name": "Norwegian Marine Data Centre", - "official_name": "Norwegian Polar Data Centre", - "similarity": 0.8727272727272727 - }, - "ANDS.CENTRE25": { - "openaire_id": "re3data_____::r3d100010917", - "datacite_name": "Geoscience Australia", - "official_name": "Geoscience Australia" - }, - "BL.UCLAN": { - "openaire_id": "re3data_____::r3d100012019", - "datacite_name": "University of Central Lancashire", - "official_name": "UCLanData" - }, - "ANDS.CENTRE23": { - "openaire_id": "re3data_____::r3d100011898", - "datacite_name": "The University of Western Australia", - "official_name": "UWA Research Repository" - }, - "CISTI.WOUDC": { - "openaire_id": "re3data_____::r3d100010367", - "datacite_name": "World Ozone and Ultraviolet Radiation Data Centre", - "official_name": "World Ozone and Ultraviolet Radiation Data Centre" - }, - "FIGSHARE.ARS": { - "openaire_id": "re3data_____::r3d100010066", - "datacite_name": "figshare Academic Research System", - "official_name": "figshare" - }, - "ILLINOIS.DATABANK": { - "openaire_id": "re3data_____::r3d100012001", - "datacite_name": "Illinois Data Bank", - "official_name": "Illinois Data Bank" - }, - "BL.ECMWF": { - "openaire_id": "re3data_____::r3d100011726", - "datacite_name": "European Centre for Medium-Range Weather Forecasts", - "official_name": "European Centre for Medium-Range Weather Forecasts" - }, - "CDL.ISSDA": { - "openaire_id": "re3data_____::r3d100010497", - "datacite_name": "Irish Social Science Data Archive (ISSDA)", - "official_name": "Irish Social Science Data Archive" - }, - "CDL.PQR": { - "openaire_id": "re3data_____::r3d100012225", - "datacite_name": "Pitt Quantum Repository", - "official_name": "Pitt Quantum Repository" - }, - "ANDS.CENTRE82": { - "openaire_id": "re3data_____::r3d100010138", - "datacite_name": "Test: Australian Data Archive", - "official_name": "Australian Data Archive", - "similarity": 0.8846153846153846 - }, - "GDCC.HARVARD-SLP": { - "openaire_id": "re3data_____::r3d100011861", - "datacite_name": "National Sleep Research Resource", - "official_name": "National Sleep Research Resource" - }, - "CDL.IMMPORT": { - "openaire_id": "re3data_____::r3d100012529", - "datacite_name": "UCSF ImmPort", - "official_name": "ImmPort" - }, - "GESIS.FID": { - "openaire_id": "re3data_____::r3d100012347", - "datacite_name": "FID f\u00fcr internationale und interdisziplin\u00e4re Rechtsforschung", - "official_name": "\u00b2Dok[\u00a7]" - }, - "OCEAN.OCEAN": { - "openaire_id": "re3data_____::r3d100012369", - "datacite_name": "Code Ocean", - "official_name": "Code Ocean" - }, - "CERN.ZENODO": { - "openaire_id": "re3data_____::r3d100010468", - "datacite_name": "Zenodo", - "official_name": "Zenodo" - }, - "ETHZ.DA-RD": { - "openaire_id": "re3data_____::r3d100011626", - "datacite_name": "ETHZ Data Archive - Research Data", - "official_name": "ETH Data Archive" - }, - "SND.ECDS": { - "openaire_id": "re3data_____::r3d100011000", - "datacite_name": "Environment Climate Data Sweden", - "official_name": "Environment Climate Data Sweden" - }, - "BL.BATH": { - "openaire_id": "re3data_____::r3d100011947", - "datacite_name": "University of Bath", - "official_name": "University of Bath Research Data Archive" - }, - "TIB.LDEO": { - "openaire_id": "re3data_____::r3d100012547", - "datacite_name": "LDEO - Lamont-Doherty Earth Observatory, Columbia University", - "official_name": "Lamont-Doherty Core Repository" - }, - "COS.OSF": { - "openaire_id": "re3data_____::r3d100011137", - "datacite_name": "Open Science Framework", - "official_name": "Open Science Framework" - }, - "ESTDOI.REPO": { - "openaire_id": "re3data_____::r3d100012333", - "datacite_name": "DataDOI", - "official_name": "DataDOI" - }, - "CDL.NSFADC": { - "openaire_id": "re3data_____::r3d100011973", - "datacite_name": "NSF Arctic Data Center", - "official_name": "NSF Arctic Data Center" - }, - "ANDS.CENTRE13": { - "openaire_id": "re3data_____::r3d100010477", - "datacite_name": "The Australian National University", - "official_name": "ANU Data Commons" - }, - "BL.NERC": { - "openaire_id": "re3data_____::r3d100010199", - "datacite_name": "Natural Environment Research Council", - "official_name": "Environmental Information Data Centre" - }, - "SAGEBIO.SYNAPSE": { - "openaire_id": "re3data_____::r3d100011894", - "datacite_name": "Synapse", - "official_name": "Synapse" - }, - "ANDS.CENTRE15": { - "openaire_id": "re3data_____::r3d100000038", - "datacite_name": "Australian Antarctic Division", - "official_name": "Australian Antarctic Data Centre" - }, - "WISC.BMRB": { - "openaire_id": "re3data_____::r3d100010191", - "datacite_name": "Biological Magnetic Resonance Bank", - "official_name": "Biological Magnetic Resonance Data Bank", - "similarity": 0.9315068493150684 - }, - "STSCI.MAST": { - "openaire_id": "re3data_____::r3d100010403", - "datacite_name": "Barbara A. Mikulski Archive for Space Telescopes", - "official_name": "Barbara A. Mikulski Archive for Space Telescopes" - }, - "CDL.NSIDC": { - "openaire_id": "re3data_____::r3d100010110", - "datacite_name": "National Snow and Ice Data Center", - "official_name": "National Snow and Ice Data Center" - }, - "BL.STRATH": { - "openaire_id": "re3data_____::r3d100012412", - "datacite_name": "University of Strathclyde", - "official_name": "University of Strathclyde KnowledgeBase Datasets" - }, - "DEMO.TDAR": { - "openaire_id": "re3data_____::r3d100010347", - "datacite_name": "The Digital Archaeological Record (tDAR)", - "official_name": "tDAR" - }, - "TIND.CALTECH": { - "openaire_id": "re3data_____::r3d100012384", - "datacite_name": "CaltechDATA", - "official_name": "CaltechDATA" - }, - "GESIS.BIBB-FDZ": { - "openaire_id": "re3data_____::r3d100010190", - "datacite_name": "Forschungsdatenzentrum im Bundesinstitut f\u00fcr Berufsbildung", - "official_name": "Forschungsdatenzentrum im Bundesinstitut f\u00fcr Berufsbildung" - }, - "ANDS.CENTRE87": { - "openaire_id": "re3data_____::r3d100010138", - "datacite_name": "Australian Data Archive", - "official_name": "Australian Data Archive" - }, - "GESIS.NEPS": { - "openaire_id": "re3data_____::r3d100010736", - "datacite_name": "Nationales Bildungspanel (National Educational Panel Study, NEPS)", - "official_name": "Nationales Bildungspanel" - }, - "CDL.UCBCRCNS": { - "openaire_id": "re3data_____::r3d100011269", - "datacite_name": "Collaborative Research in Computational Neuroscience (CRCNS)", - "official_name": "Collaborative Research in Computational Neuroscience" - }, - "TIB.UKON": { - "openaire_id": "re3data_____::r3d100010469", - "datacite_name": "Movebank", - "official_name": "Movebank" - }, - "UMN.IPUMS": { - "openaire_id": "re3data_____::r3d100010794", - "datacite_name": "Minnesota Population Center", - "official_name": "Minnesota Population Center" - }, - "TIB.BIKF": { - "openaire_id": "re3data_____::r3d100012379", - "datacite_name": "Senckenberg Data & Metadata Repository", - "official_name": "Senckenberg Data & Metadata Repository" - }, - "TDL.GRIIDC": { - "openaire_id": "re3data_____::r3d100011571", - "datacite_name": "Gulf of Mexico Research Initiative Information and Data Cooperative", - "official_name": "Gulf of Mexico Research Initiative Information and Data Cooperative" - }, - "DELFT.NIBG": { - "openaire_id": "re3data_____::r3d100012167", - "datacite_name": "Sound and Vision", - "official_name": "Sound and Vision" - }, - "BL.SURREY": { - "openaire_id": "re3data_____::r3d100012232", - "datacite_name": "University of Surrey", - "official_name": "Surrey Research Insight" - }, - "OSTI.ORNLNGEE": { - "openaire_id": "re3data_____::r3d100011676", - "datacite_name": "NGEE-Arctic (Next Generation Ecosystems Experiement)", - "official_name": "NGEE Arctic" - }, - "TIB.WDCRSAT": { - "openaire_id": "re3data_____::r3d100010156", - "datacite_name": "World Data Center for Remote Sensing of the Atmosphere", - "official_name": "The World Data Center for Remote Sensing of the Atmosphere", - "similarity": 0.9642857142857143 - }, - "ZBMED.DSMZ": { - "openaire_id": "re3data_____::r3d100010219", - "datacite_name": "DSMZ", - "official_name": "DSMZ" - }, - "DOINZ.NZAU": { - "openaire_id": "re3data_____::r3d100012110", - "datacite_name": "University of Auckland Data Publishing and Discovery Service", - "official_name": "University of Auckland Data Repository" - }, - "INIST.RESIF": { - "openaire_id": "re3data_____::r3d100012222", - "datacite_name": "R\u00e9seau sismologique et g\u00e9od\u00e9sique fran\u00e7ais", - "official_name": "RESIF Seismic Data Portal" - }, - "CDL.NCEAS": { - "openaire_id": "re3data_____::r3d100010093", - "datacite_name": "National Center for Ecological Analysis and Synthesis (NCEAS)", - "official_name": "National Center for Ecological Analysis and Synthesis Data Repository" - }, - "ZBMED.EMP": { - "openaire_id": "re3data_____::r3d100010234", - "datacite_name": "eyeMoviePedia", - "official_name": "eyeMoviePedia" - }, - "ZBMED.BIOFRESH": { - "openaire_id": "re3data_____::r3d100011651", - "datacite_name": "Project BioFresh, Leibniz-Institute of Freshwater Ecology and Inland Fisheries", - "official_name": "Freshwater Biodiversity Data Portal" - }, - "INIST.IFREMER": { - "openaire_id": "re3data_____::r3d100011867", - "datacite_name": "Institut Fran\u00e7ais de Recherche pour l'Exploitation de la Mer", - "official_name": "SEANOE" - }, - "ETHZ.SICAS": { - "openaire_id": "re3data_____::r3d100011560", - "datacite_name": "SICAS", - "official_name": "Sicas Medical Image Repository" - }, - "SND.SND": { - "openaire_id": "re3data_____::r3d100010146", - "datacite_name": "Swedish National Data Service", - "official_name": "Swedish National Data Service" - }, - "DELFT.EASY": { - "openaire_id": "re3data_____::r3d100011201", - "datacite_name": "DANS", - "official_name": "DataverseNL" - }, - "WH.WHOAS": { - "openaire_id": "re3data_____::r3d100010423", - "datacite_name": "Woods Hole Open Access Server", - "official_name": "Woods Hole Open Access Server" - }, - "DATACITE.UCSC": { - "openaire_id": "re3data_____::r3d100010243", - "datacite_name": "UCSC Genome Browser", - "official_name": "UCSC Genome Browser" - } + "GESIS.RKI": { + "openaire_id": "re3data_____::r3d100010436", + "datacite_name": "Forschungsdatenzentrum am Robert Koch Institut", + "official_name": "Forschungsdatenzentrum am Robert Koch Institut" + }, + "DELFT.DATA4TU": { + "openaire_id": "re3data_____::r3d100010216", + "datacite_name": "4TU.ResearchData | science.engineering.design", + "official_name": "4TU.ResearchData | science.engineering.design" + }, + "FBTK.DMXFNR": { + "openaire_id": "re3data_____::r3d100013444", + "datacite_name": "Aperta TÜBİTAK Open Archive", + "official_name": "Aperta TÜBİTAK Open Archive" + }, + "BL.CAM": { + "openaire_id": "re3data_____::r3d100010620", + "datacite_name": "Apollo", + "official_name": "Apollo" + }, + "NU.ARCH": { + "openaire_id": "re3data_____::r3d100012925", + "datacite_name": "Arch", + "official_name": "Arch" + }, + "BL.ADS": { + "openaire_id": "re3data_____::r3d100000006", + "datacite_name": "Archaeology Data Service", + "official_name": "Archaeology Data Service" + }, + "FZJ.B2SHARE": { + "openaire_id": "re3data_____::r3d100011394", + "datacite_name": "B2SHARE", + "official_name": "B2SHARE" + }, + "STSCI.MAST": { + "openaire_id": "re3data_____::r3d100010403", + "datacite_name": "Barbara A. Mikulski Archive for Space Telescopes", + "official_name": "Barbara A. Mikulski Archive for Space Telescopes" + }, + "CBG.DATASETS": { + "openaire_id": "re3data_____::r3d100010927", + "datacite_name": "Barcode of Life Data Systems", + "official_name": "Barcode of Life Data Systems" + }, + "TIB.BEILST": { + "openaire_id": "re3data_____::r3d100012329", + "datacite_name": "STRENDA DB", + "official_name": "STRENDA DB" + }, + "MLBS.SKUXGS": { + "openaire_id": "re3data_____::r3d100011696", + "datacite_name": "biodiversity.aq", + "official_name": "biodiversity.aq" + }, + "BL.BIRKBECK": { + "openaire_id": "re3data_____::r3d100012185", + "datacite_name": "Birkbeck Research Data", + "official_name": "Birkbeck Research Data" + }, + "SND.BOLIN": { + "openaire_id": "re3data_____::r3d100011699", + "datacite_name": "Bolin Centre Database", + "official_name": "Bolin Centre Database" + }, + "BROWN.BDR": { + "openaire_id": "re3data_____::r3d100011654", + "datacite_name": "Brown Digital Repository", + "official_name": "Brown Digital Repository" + }, + "BL.BRUNEL": { + "openaire_id": "re3data_____::r3d100012140", + "datacite_name": "Brunel figshare", + "official_name": "Brunel figshare" + }, + "TIB.BAFG": { + "openaire_id": "re3data_____::r3d100011664", + "datacite_name": "Geoportal der BFG", + "official_name": "Geoportal der BFG" + }, + "TIND.CALTECH": { + "openaire_id": "re3data_____::r3d100012384", + "datacite_name": "CaltechDATA", + "official_name": "CaltechDATA" + }, + "CUL.CIESIN": { + "openaire_id": "re3data_____::r3d100010207", + "datacite_name": "Center for International Earth Science Information Network", + "official_name": "Center for International Earth Science Information Network" + }, + "TIB.KIT-IOC": { + "openaire_id": "re3data_____::r3d100010748", + "datacite_name": "chemotion", + "official_name": "chemotion" + }, + "CORNELL.CISER": { + "openaire_id": "re3data_____::r3d100011056", + "datacite_name": "CISER Data & Reproduction Archive", + "official_name": "CISER Data & Reproduction Archive" + }, + "CLARIN.CLARIN": { + "openaire_id": "re3data_____::r3d100010209", + "datacite_name": "CLARIN-ERIC", + "official_name": "CLARIN-ERIC" + }, + "OCEAN.OCEAN": { + "openaire_id": "re3data_____::r3d100012369", + "datacite_name": "Code Ocean", + "official_name": "Code Ocean" + }, + "CORNELL.LIBRARY": { + "openaire_id": "re3data_____::r3d100012322", + "datacite_name": "eCommons - Cornell's digital repository", + "official_name": "eCommons - Cornell's digital repository" + }, + "BL.CRAN": { + "openaire_id": "re3data_____::r3d100012068", + "datacite_name": "Cranfield Online Research Data", + "official_name": "Cranfield Online Research Data" + }, + "DARTLIB.CRAWDAD": { + "openaire_id": "re3data_____::r3d100010716", + "datacite_name": "CRAWDAD", + "official_name": "CRAWDAD" + }, + "GESIS.CSDA": { + "openaire_id": "re3data_____::r3d100010484", + "datacite_name": "Czech Social Science Data Archive", + "official_name": "Czech Social Science Data Archive" + }, + "PSU.DATACOM": { + "openaire_id": "re3data_____::r3d100012927", + "datacite_name": "Data Commons", + "official_name": "Data Commons" + }, + "INIST.INRA": { + "openaire_id": "re3data_____::r3d100012673", + "datacite_name": "Data INRAE", + "official_name": "Data INRAE" + }, + "UMN.DRUM": { + "openaire_id": "re3data_____::r3d100011393", + "datacite_name": "Data Repository for the University of Minnesota", + "official_name": "Data Repository for the University of Minnesota" + }, + "ESTDOI.REPO": { + "openaire_id": "re3data_____::r3d100012333", + "datacite_name": "DataDOI", + "official_name": "DataDOI" + }, + "DAFI.CLIENT": { + "openaire_id": "re3data_____::r3d100010217", + "datacite_name": "DataFirst", + "official_name": "DataFirst" + }, + "UNM.DATAONE": { + "openaire_id": "re3data_____::r3d100000045", + "datacite_name": "DataONE", + "official_name": "DataONE" + }, + "FCT.UMINHO": { + "openaire_id": "re3data_____::r3d100013173", + "datacite_name": "DataRepositoriUM", + "official_name": "DataRepositoriUM" + }, + "FIGSHARE.IASTATE": { + "openaire_id": "re3data_____::r3d100012696", + "datacite_name": "DataShare: the Open Data Repository of Iowa State University", + "official_name": "DataShare: the Open Data Repository of Iowa State University" + }, + "PU.DATASPACE": { + "openaire_id": "re3data_____::r3d100012513", + "datacite_name": "DataSpace", + "official_name": "DataSpace" + }, + "DANS.DATAVERSENL": { + "openaire_id": "re3data_____::r3d100011201", + "datacite_name": "DataverseNL", + "official_name": "DataverseNL" + }, + "BIBSYS.UIT-ORD": { + "openaire_id": "re3data_____::r3d100012538", + "datacite_name": "DataverseNO", + "official_name": "DataverseNO" + }, + "GESIS.SSRI": { + "openaire_id": "re3data_____::r3d100013494", + "datacite_name": "DATICE", + "official_name": "DATICE" + }, + "SML.TDAR": { + "openaire_id": "re3data_____::r3d100010347", + "datacite_name": "tDAR", + "official_name": "tDAR" + }, + "CSIC.DIGITAL": { + "openaire_id": "re3data_____::r3d100011076", + "datacite_name": "DIGITAL.CSIC", + "official_name": "DIGITAL.CSIC" + }, + "BL.DRI": { + "openaire_id": "re3data_____::r3d100011805", + "datacite_name": "Digital Repository of Ireland", + "official_name": "Digital Repository of Ireland" + }, + "SUBGOE.DARIAH": { + "openaire_id": "re3data_____::r3d100011345", + "datacite_name": "DARIAH-DE Repository", + "official_name": "DARIAH-DE Repository" + }, + "DRYAD.DRYAD": { + "openaire_id": "re3data_____::r3d100000044", + "datacite_name": "DRYAD", + "official_name": "DRYAD" + }, + "ZBMED.DSMZ": { + "openaire_id": "re3data_____::r3d100010219", + "datacite_name": "DSMZ", + "official_name": "DSMZ" + }, + "DKRZ.ESGF": { + "openaire_id": "re3data_____::r3d100011159", + "datacite_name": "Earth System Grid Federation", + "official_name": "Earth System Grid Federation" + }, + "KTSW.AEZVVV": { + "openaire_id": "re3data_____::r3d100013469", + "datacite_name": "EarthEnv", + "official_name": "EarthEnv" + }, + "DANS.ARCHIVE": { + "openaire_id": "re3data_____::r3d100010214", + "datacite_name": "EASY", + "official_name": "EASY" + }, + "ETHZ.WSL": { + "openaire_id": "re3data_____::r3d100012587", + "datacite_name": "EnviDat", + "official_name": "EnviDat" + }, + "ETHZ.E-COLL": { + "openaire_id": "re3data_____::r3d100012557", + "datacite_name": "ETH Zürich Research Collection", + "official_name": "ETH Zürich Research Collection" + }, + "ETHZ.DA-RD": { + "openaire_id": "re3data_____::r3d100011626", + "datacite_name": "ETH Data Archive", + "official_name": "ETH Data Archive" + }, + "BL.ECMWF": { + "openaire_id": "re3data_____::r3d100011726", + "datacite_name": "European Centre for Medium-Range Weather Forecasts", + "official_name": "European Centre for Medium-Range Weather Forecasts" + }, + "CARL.FRDR": { + "openaire_id": "re3data_____::r3d100012646", + "datacite_name": "Federated Research Data Repository", + "official_name": "Federated Research Data Repository" + }, + "FIGSHARE.ARS": { + "openaire_id": "re3data_____::r3d100010066", + "datacite_name": "figshare", + "official_name": "figshare" + }, + "TIB.FLOSS": { + "openaire_id": "re3data_____::r3d100010863", + "datacite_name": "FLOSSmole", + "official_name": "FLOSSmole" + }, + "LXKC.DSKYFI": { + "openaire_id": "re3data_____::r3d100010976", + "datacite_name": "ForestPlots.net", + "official_name": "ForestPlots.net" + }, + "YKDK.ZUYSQI": { + "openaire_id": "re3data_____::r3d100010368", + "datacite_name": "FORS DARIS", + "official_name": "FORS DARIS" + }, + "TIB.LUIS": { + "openaire_id": "re3data_____::r3d100012825", + "datacite_name": "Forschungsdaten-Repositorium der LUH", + "official_name": "Forschungsdaten-Repositorium der LUH" + }, + "GESIS.BIBB-FDZ": { + "openaire_id": "re3data_____::r3d100010190", + "datacite_name": "Forschungsdatenzentrum im Bundesinstitut für Berufsbildung", + "official_name": "Forschungsdatenzentrum im Bundesinstitut für Berufsbildung" + }, + "GESIS.ZPID": { + "openaire_id": "re3data_____::r3d100010328", + "datacite_name": "PsychData", + "official_name": "PsychData" + }, + "TIB.GFZ": { + "openaire_id": "re3data_____::r3d100012335", + "datacite_name": "GFZ Data Services", + "official_name": "GFZ Data Services" + }, + "CNGB.GIGADB": { + "openaire_id": "re3data_____::r3d100010478", + "datacite_name": "GigaDB", + "official_name": "GigaDB" + }, + "GBIF.GBIF": { + "openaire_id": "re3data_____::r3d100000039", + "datacite_name": "Global Biodiversity Information Facility", + "official_name": "Global Biodiversity Information Facility" + }, + "ARDCX.GRIFFITH": { + "openaire_id": "re3data_____::r3d100010864", + "datacite_name": "Griffith University Research Data Repository", + "official_name": "Griffith University Research Data Repository" + }, + "GDCC.HARVARD-SBGR": { + "openaire_id": "re3data_____::r3d100011601", + "datacite_name": "Structural Biology Data Grid", + "official_name": "Structural Biology Data Grid" + }, + "GDCC.HARVARD-DV": { + "openaire_id": "re3data_____::r3d100010051", + "datacite_name": "Harvard Dataverse", + "official_name": "Harvard Dataverse" + }, + "CERN.HEPDATA": { + "openaire_id": "re3data_____::r3d100010081", + "datacite_name": "HEPData", + "official_name": "HEPData" + }, + "SND.ICOS": { + "openaire_id": "re3data_____::r3d100012203", + "datacite_name": "ICOS Carbon Portal", + "official_name": "ICOS Carbon Portal" + }, + "GESIS.ICPSR": { + "openaire_id": "re3data_____::r3d100010255", + "datacite_name": "Inter-university Consortium for Political and Social Research", + "official_name": "Inter-university Consortium for Political and Social Research" + }, + "IEEE.DATAPORT": { + "openaire_id": "re3data_____::r3d100012569", + "datacite_name": "IEEE DataPort", + "official_name": "IEEE DataPort" + }, + "IIASA.DARE": { + "openaire_id": "re3data_____::r3d100012932", + "datacite_name": "IIASA DARE", + "official_name": "IIASA DARE" + }, + "ILLINOIS.DATABANK": { + "openaire_id": "re3data_____::r3d100012001", + "datacite_name": "Illinois Data Bank", + "official_name": "Illinois Data Bank" + }, + "IRIS.IRIS": { + "openaire_id": "re3data_____::r3d100010268", + "datacite_name": "Incorporated Research Institutions for Seismology", + "official_name": "Incorporated Research Institutions for Seismology" + }, + "GESIS.INDEPTH": { + "openaire_id": "re3data_____::r3d100011392", + "datacite_name": "INDEPTH Data Repository", + "official_name": "INDEPTH Data Repository" + }, + "JCVI.GXPWAQ": { + "openaire_id": "re3data_____::r3d100011558", + "datacite_name": "Influenza Research Database", + "official_name": "Influenza Research Database" + }, + "TIB.INP": { + "openaire_id": "re3data_____::r3d100013120", + "datacite_name": "INPTDAT", + "official_name": "INPTDAT" + }, + "CERN.INSPIRE": { + "openaire_id": "re3data_____::r3d100011077", + "datacite_name": "Inspire-HEP", + "official_name": "Inspire-HEP" + }, + "INIST.IFREMER": { + "openaire_id": "re3data_____::r3d100012965", + "datacite_name": "IFREMER-SISMER Portail de données marines", + "official_name": "IFREMER-SISMER Portail de données marines" + }, + "INIST.ILL": { + "openaire_id": "re3data_____::r3d100012072", + "datacite_name": "ILL Data Portal", + "official_name": "ILL Data Portal" + }, + "TIB.KIT-IMK": { + "openaire_id": "re3data_____::r3d100011956", + "datacite_name": "CARIBIC", + "official_name": "CARIBIC" + }, + "WWPX.INTR2": { + "openaire_id": "re3data_____::r3d100012347", + "datacite_name": "²Dok[§]", + "official_name": "²Dok[§]" + }, + "BL.IITA": { + "openaire_id": "re3data_____::r3d100012883", + "datacite_name": "International Institute of Tropical Agriculture datasets", + "official_name": "International Institute of Tropical Agriculture datasets" + }, + "TIB.IPK": { + "openaire_id": "re3data_____::r3d100011647", + "datacite_name": "IPK Gatersleben", + "official_name": "IPK Gatersleben" + }, + "IST.REX": { + "openaire_id": "re3data_____::r3d100012394", + "datacite_name": "IST Austria Research Explorer", + "official_name": "IST Austria Research Explorer" + }, + "GDCC.JHU": { + "openaire_id": "re3data_____::r3d100011836", + "datacite_name": "Johns Hopkins Data Archive Dataverse Network", + "official_name": "Johns Hopkins Data Archive Dataverse Network" + }, + "KAGGLE.KAGGLE": { + "openaire_id": "re3data_____::r3d100012705", + "datacite_name": "Kaggle", + "official_name": "Kaggle" + }, + "ESTDOI.KEEL": { + "openaire_id": "re3data_____::r3d100011941", + "datacite_name": "Center of Estonian Language Resources", + "official_name": "Center of Estonian Language Resources" + }, + "RICE.KINDER": { + "openaire_id": "re3data_____::r3d100012884", + "datacite_name": "Kinder Institute Urban Data Platform", + "official_name": "Kinder Institute Urban Data Platform" + }, + "DELFT.KNMI": { + "openaire_id": "re3data_____::r3d100011879", + "datacite_name": "KNMI Data Platform", + "official_name": "KNMI Data Platform" + }, + "LANDCARE.SOILS": { + "openaire_id": "re3data_____::r3d100010835", + "datacite_name": "Land Resource Information Systems Portal", + "official_name": "Land Resource Information Systems Portal" + }, + "LANDCARE.GENERAL": { + "openaire_id": "re3data_____::r3d100011662", + "datacite_name": "Landcare Research Data Repository", + "official_name": "Landcare Research Data Repository" + }, + "TIB.LDEO": { + "openaire_id": "re3data_____::r3d100012547", + "datacite_name": "Lamont-Doherty Core Repository", + "official_name": "Lamont-Doherty Core Repository" + }, + "ZBMED.LERNZDB": { + "openaire_id": "re3data_____::r3d100010066", + "datacite_name": "figshare", + "official_name": "figshare" + }, + "GESIS.NEPS": { + "openaire_id": "re3data_____::r3d100010736", + "datacite_name": "Nationales Bildungspanel", + "official_name": "Nationales Bildungspanel" + }, + "BL.LINCOLN": { + "openaire_id": "re3data_____::r3d100012407", + "datacite_name": "Lincoln repository", + "official_name": "Lincoln repository" + }, + "LDC.CATALOG": { + "openaire_id": "re3data_____::r3d100011940", + "datacite_name": "Linguistic Data Consortium", + "official_name": "Linguistic Data Consortium" + }, + "ZBW.IFO": { + "openaire_id": "re3data_____::r3d100010201", + "datacite_name": "LMU-ifo Economics & Business Data Center", + "official_name": "LMU-ifo Economics & Business Data Center" + }, + "DK.SB": { + "openaire_id": "re3data_____::r3d100012617", + "datacite_name": "LOAR", + "official_name": "LOAR" + }, + "BL.LSHTM": { + "openaire_id": "re3data_____::r3d100011800", + "datacite_name": "LSHTM Data Compass", + "official_name": "LSHTM Data Compass" + }, + "BL.LBORO": { + "openaire_id": "re3data_____::r3d100012143", + "datacite_name": "Loughborough Data Repository", + "official_name": "Loughborough Data Repository" + }, + "DELFT.MAASTRO": { + "openaire_id": "re3data_____::r3d100011086", + "datacite_name": "CancerData.org", + "official_name": "CancerData.org" + }, + "OIBK.OHYCFA": { + "openaire_id": "re3data_____::r3d100013499", + "datacite_name": "Materials Data Repository", + "official_name": "Materials Data Repository" + }, + "MDW.REPOSITORY": { + "openaire_id": "re3data_____::r3d100012108", + "datacite_name": "mdw Repository", + "official_name": "mdw Repository" + }, + "ELSEVIER.MD": { + "openaire_id": "re3data_____::r3d100011868", + "datacite_name": "Mendeley Data", + "official_name": "Mendeley Data" + }, + "BL.MENDELEY": { + "openaire_id": "re3data_____::r3d100011868", + "datacite_name": "Mendeley Data", + "official_name": "Mendeley Data" + }, + "BKMX.AZJWZC": { + "openaire_id": "re3data_____::r3d100011394", + "datacite_name": "B2SHARE", + "official_name": "B2SHARE" + }, + "CSC.NRD": { + "openaire_id": "re3data_____::r3d100012157", + "datacite_name": "Fairdata IDA Research Data Storage Service", + "official_name": "Fairdata IDA Research Data Storage Service" + }, + "UMN.IPUMS": { + "openaire_id": "re3data_____::r3d100010794", + "datacite_name": "Minnesota Population Center", + "official_name": "Minnesota Population Center" + }, + "PHBI.REPO": { + "openaire_id": "re3data_____::r3d100010101", + "datacite_name": "MorphoBank", + "official_name": "MorphoBank" + }, + "TIB.UKON": { + "openaire_id": "re3data_____::r3d100010469", + "datacite_name": "Movebank Data Repository", + "official_name": "Movebank Data Repository" + }, + "INIST.HUMANUM": { + "openaire_id": "re3data_____::r3d100012102", + "datacite_name": "NAKALA", + "official_name": "NAKALA" + }, + "GDCC.NTU": { + "openaire_id": "re3data_____::r3d100012440", + "datacite_name": "DR-NTU (Data)", + "official_name": "DR-NTU (Data)" + }, + "CORNELL.NDACAN": { + "openaire_id": "re3data_____::r3d100011036", + "datacite_name": "National Data Archive on Child Abuse and Neglect", + "official_name": "National Data Archive on Child Abuse and Neglect" + }, + "NOAA.NCEI": { + "openaire_id": "re3data_____::r3d100011801", + "datacite_name": "NCEI", + "official_name": "NCEI" + }, + "GDCC.HARVARD-SLP": { + "openaire_id": "re3data_____::r3d100011861", + "datacite_name": "National Sleep Research Resource", + "official_name": "National Sleep Research Resource" + }, + "NSIDC.DATACTR": { + "openaire_id": "re3data_____::r3d100010110", + "datacite_name": "National Snow and Ice Data Center", + "official_name": "National Snow and Ice Data Center" + }, + "NUS.SB": { + "openaire_id": "re3data_____::r3d100012564", + "datacite_name": "ScholarBank@NUS", + "official_name": "ScholarBank@NUS" + }, + "BL.NHM": { + "openaire_id": "re3data_____::r3d100011675", + "datacite_name": "Natural History Museum, Data Portal", + "official_name": "Natural History Museum, Data Portal" + }, + "ESDIS.ORNL": { + "openaire_id": "re3data_____::r3d100000037", + "datacite_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics", + "official_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics" + }, + "INIST.OTELO": { + "openaire_id": "re3data_____::r3d100012505", + "datacite_name": "ORDaR", + "official_name": "ORDaR" + }, + "EUROP.ODIN": { + "openaire_id": "re3data_____::r3d100011378", + "datacite_name": "MatDB", + "official_name": "MatDB" + }, + "GDCC.ODUM-DV": { + "openaire_id": "re3data_____::r3d100000005", + "datacite_name": "UNC Dataverse", + "official_name": "UNC Dataverse" + }, + "OHSU.OHSU": { + "openaire_id": "re3data_____::r3d100012244", + "datacite_name": "OHSU Digital Commons", + "official_name": "OHSU Digital Commons" + }, + "KIM.OPENKIM": { + "openaire_id": "re3data_____::r3d100011864", + "datacite_name": "OpenKIM", + "official_name": "OpenKIM" + }, + "COS.OSF": { + "openaire_id": "re3data_____::r3d100011137", + "datacite_name": "Open Science Framework", + "official_name": "Open Science Framework" + }, + "SUL.OPENNEURO": { + "openaire_id": "re3data_____::r3d100010924", + "datacite_name": "OpenNeuro", + "official_name": "OpenNeuro" + }, + "BL.SHEF": { + "openaire_id": "re3data_____::r3d100012124", + "datacite_name": "ORDA - The University of Sheffield Research Data Catalogue and Repository", + "official_name": "ORDA - The University of Sheffield Research Data Catalogue and Repository" + }, + "BL.BROOKES": { + "openaire_id": "re3data_____::r3d100012929", + "datacite_name": "Oxford Brookes University: RADAR", + "official_name": "Oxford Brookes University: RADAR" + }, + "BL.OXDB": { + "openaire_id": "re3data_____::r3d100011653", + "datacite_name": "DataBank, Bodleian Libraries, University of Oxford", + "official_name": "DataBank, Bodleian Libraries, University of Oxford" + }, + "PANGAEA.REPOSITORY": { + "openaire_id": "re3data_____::r3d100010134", + "datacite_name": "PANGAEA", + "official_name": "PANGAEA" + }, + "NASAPDS.NASAPDS": { + "openaire_id": "re3data_____::r3d100010121", + "datacite_name": "PDS", + "official_name": "PDS" + }, + "BF.DISCOVER": { + "openaire_id": "re3data_____::r3d100013148", + "datacite_name": "Blackfynn Discover", + "official_name": "Blackfynn Discover" + }, + "MIT.PHYSIO": { + "openaire_id": "re3data_____::r3d100011561", + "datacite_name": "PhysioNet", + "official_name": "PhysioNet" + }, + "ZBMED.BIOFRESH": { + "openaire_id": "re3data_____::r3d100011651", + "datacite_name": "Freshwater Biodiversity Data Portal", + "official_name": "Freshwater Biodiversity Data Portal" + }, + "PDS.DATA": { + "openaire_id": "re3data_____::r3d100013015", + "datacite_name": "Project Data Sphere", + "official_name": "Project Data Sphere" + }, + "ESTDOI.QDB": { + "openaire_id": "re3data_____::r3d100013451", + "datacite_name": "QsarDB", + "official_name": "QsarDB" + }, + "INIST.ADISP": { + "openaire_id": "re3data_____::r3d100010494", + "datacite_name": "Quetelet PROGEDO Diffusion", + "official_name": "Quetelet PROGEDO Diffusion" + }, + "TIB.RADAR": { + "openaire_id": "re3data_____::r3d100012330", + "datacite_name": "RADAR", + "official_name": "RADAR" + }, + "UCHILE.DATAVERSE": { + "openaire_id": "re3data_____::r3d100013108", + "datacite_name": "Repositorio de Datos de Investigación de la Universidad de Chile", + "official_name": "Repositorio de Datos de Investigación de la Universidad de Chile" + }, + "UDR.RESEARCHDATA": { + "openaire_id": "re3data_____::r3d100013212", + "datacite_name": "Repositorio de datos de investigación de la Universidad del Rosario", + "official_name": "Repositorio de datos de investigación de la Universidad del Rosario" + }, + "GESIS.DIPF": { + "openaire_id": "re3data_____::r3d100010390", + "datacite_name": "Forschungsdatenzentrum Bildung", + "official_name": "Forschungsdatenzentrum Bildung" + }, + "RG.RG": { + "openaire_id": "re3data_____::r3d100012227", + "datacite_name": "ResearchGate Data", + "official_name": "ResearchGate Data" + }, + "INIST.RESIF": { + "openaire_id": "re3data_____::r3d100012222", + "datacite_name": "Résif Seismological Data Portal", + "official_name": "Résif Seismological Data Portal" + }, + "TIB.HZDR": { + "openaire_id": "re3data_____::r3d100012757", + "datacite_name": "RODARE", + "official_name": "RODARE" + }, + "OCUL.SPDV": { + "openaire_id": "re3data_____::r3d100010691", + "datacite_name": "Scholars Portal Dataverse", + "official_name": "Scholars Portal Dataverse" + }, + "PSU.SCHOLAR": { + "openaire_id": "re3data_____::r3d100010701", + "datacite_name": "ScholarSphere", + "official_name": "ScholarSphere" + }, + "TIB.BIKF": { + "openaire_id": "re3data_____::r3d100012379", + "datacite_name": "Senckenberg (meta) data portal", + "official_name": "Senckenberg (meta) data portal" + }, + "GESIS.SHARE": { + "openaire_id": "re3data_____::r3d100010430", + "datacite_name": "Survey of Health, Ageing and Retirement in Europe", + "official_name": "Survey of Health, Ageing and Retirement in Europe" + }, + "BL.HALLAM": { + "openaire_id": "re3data_____::r3d100011909", + "datacite_name": "Sheffield Hallam University Research Data Archive", + "official_name": "Sheffield Hallam University Research Data Archive" + }, + "ETHZ.SICAS": { + "openaire_id": "re3data_____::r3d100011560", + "datacite_name": "Sicas Medical Image Repository", + "official_name": "Sicas Medical Image Repository" + }, + "SUL.SIMTK": { + "openaire_id": "re3data_____::r3d100012486", + "datacite_name": "SimTK", + "official_name": "SimTK" + }, + "SI.SI": { + "openaire_id": "re3data_____::r3d100012274", + "datacite_name": "Smithsonian Research Online", + "official_name": "Smithsonian Research Online" + }, + "CONCOR.KCYDCU": { + "openaire_id": "re3data_____::r3d100012818", + "datacite_name": "Spectrum Research Repository", + "official_name": "Spectrum Research Repository" + }, + "SUL.SDR": { + "openaire_id": "re3data_____::r3d100010710", + "datacite_name": "Stanford Digital Repository", + "official_name": "Stanford Digital Repository" + }, + "SND.SU": { + "openaire_id": "re3data_____::r3d100012147", + "datacite_name": "Stockholm University Figshare Repository", + "official_name": "Stockholm University Figshare Repository" + }, + "INIST.CDS": { + "openaire_id": "re3data_____::r3d100010584", + "datacite_name": "Strasbourg Astronomical Data Center", + "official_name": "Strasbourg Astronomical Data Center" + }, + "DELFT.SURFSARA": { + "openaire_id": "re3data_____::r3d100013084", + "datacite_name": "SURF Data Repository", + "official_name": "SURF Data Repository" + }, + "SND.SMHI": { + "openaire_id": "re3data_____::r3d100011776", + "datacite_name": "Swedish Meteorological and Hydrological Institute open data", + "official_name": "Swedish Meteorological and Hydrological Institute open data" + }, + "SND.SND": { + "openaire_id": "re3data_____::r3d100010146", + "datacite_name": "Swedish National Data Service", + "official_name": "Swedish National Data Service" + }, + "SAGEBIO.SYNAPSE": { + "openaire_id": "re3data_____::r3d100011894", + "datacite_name": "Synapse", + "official_name": "Synapse" + }, + "GDCC.SYR-QDR": { + "openaire_id": "re3data_____::r3d100011038", + "datacite_name": "Qualitative Data Repository", + "official_name": "Qualitative Data Repository" + }, + "FZJ.TERENO": { + "openaire_id": "re3data_____::r3d100011471", + "datacite_name": "TERENO Data Discovery Portal", + "official_name": "TERENO Data Discovery Portal" + }, + "TUW.TETHYS": { + "openaire_id": "re3data_____::r3d100013400", + "datacite_name": "Tethys", + "official_name": "Tethys" + }, + "GESIS.AUSSDA": { + "openaire_id": "re3data_____::r3d100010483", + "datacite_name": "AUSSDA Dataverse", + "official_name": "AUSSDA Dataverse" + }, + "CCDC.CSD": { + "openaire_id": "re3data_____::r3d100010197", + "datacite_name": "The Cambridge Structural Database", + "official_name": "The Cambridge Structural Database" + }, + "SML.TCIA": { + "openaire_id": "re3data_____::r3d100011559", + "datacite_name": "The Cancer Imaging Archive", + "official_name": "The Cancer Imaging Archive" + }, + "SI.CDA": { + "openaire_id": "re3data_____::r3d100010035", + "datacite_name": "The Chandra Data Archive", + "official_name": "The Chandra Data Archive" + }, + "HLQC.ZNXELI": { + "openaire_id": "re3data_____::r3d100013029", + "datacite_name": "TUdatalib", + "official_name": "TUdatalib" + }, + "TIB.TUHH": { + "openaire_id": "re3data_____::r3d100013076", + "datacite_name": "TUHH Open Research - Research Data TUHH", + "official_name": "TUHH Open Research - Research Data TUHH" + }, + "BL.UEL": { + "openaire_id": "re3data_____::r3d100012414", + "datacite_name": "UEL Research Repository", + "official_name": "UEL Research Repository" + }, + "ARFM.UFZDRP": { + "openaire_id": "re3data_____::r3d100013674", + "datacite_name": "Datenrechercheportal UFZ", + "official_name": "Datenrechercheportal UFZ" + }, + "BL.UKDA": { + "openaire_id": "re3data_____::r3d100010215", + "datacite_name": "UK Data Archive", + "official_name": "UK Data Archive" + }, + "GDCC.ODUM-LIBRARY": { + "openaire_id": "re3data_____::r3d100000005", + "datacite_name": "UNC Dataverse", + "official_name": "UNC Dataverse" + }, + "CRUI.UNIBO": { + "openaire_id": "re3data_____::r3d100012604", + "datacite_name": "AMS Acta", + "official_name": "AMS Acta" + }, + "LMU.UB": { + "openaire_id": "re3data_____::r3d100010731", + "datacite_name": "Open Data LMU", + "official_name": "Open Data LMU" + }, + "INIST.IFSTTAR": { + "openaire_id": "re3data_____::r3d100013062", + "datacite_name": "Data Univ Gustave Eiffel", + "official_name": "Data Univ Gustave Eiffel" + }, + "BL.UCLD": { + "openaire_id": "re3data_____::r3d100012417", + "datacite_name": "UCL Discovery", + "official_name": "UCL Discovery" + }, + "NZAU.DATA": { + "openaire_id": "re3data_____::r3d100012110", + "datacite_name": "University of Auckland Data Repository", + "official_name": "University of Auckland Data Repository" + }, + "BL.BATH": { + "openaire_id": "re3data_____::r3d100011947", + "datacite_name": "University of Bath Research Data Archive", + "official_name": "University of Bath Research Data Archive" + }, + "BL.BRISTOL": { + "openaire_id": "re3data_____::r3d100011099", + "datacite_name": "data.bris Research Data Repository", + "official_name": "data.bris Research Data Repository" + }, + "FIGSHARE.UCT": { + "openaire_id": "re3data_____::r3d100012633", + "datacite_name": "ZivaHub", + "official_name": "ZivaHub" + }, + "BL.UCLAN": { + "openaire_id": "re3data_____::r3d100012019", + "datacite_name": "UCLanData", + "official_name": "UCLanData" + }, + "BL.ED": { + "openaire_id": "re3data_____::r3d100000047", + "datacite_name": "Edinburgh DataShare", + "official_name": "Edinburgh DataShare" + }, + "BL.ESSEX": { + "openaire_id": "re3data_____::r3d100012405", + "datacite_name": "Research Data at Essex", + "official_name": "Research Data at Essex" + }, + "BL.EXETER": { + "openaire_id": "re3data_____::r3d100011202", + "datacite_name": "Open Research Exeter", + "official_name": "Open Research Exeter" + }, + "BL.HERTS": { + "openaire_id": "re3data_____::r3d100013116", + "datacite_name": "University of Hertfordshire Research Archive", + "official_name": "University of Hertfordshire Research Archive" + }, + "NKN.NKN": { + "openaire_id": "re3data_____::r3d100011587", + "datacite_name": "Northwest Knowledge Network", + "official_name": "Northwest Knowledge Network" + }, + "BL.LEEDS": { + "openaire_id": "re3data_____::r3d100011945", + "datacite_name": "Research Data Leeds Repository", + "official_name": "Research Data Leeds Repository" + }, + "UNIMELB.REPO1": { + "openaire_id": "re3data_____::r3d100012145", + "datacite_name": "melbourne.figshare.com", + "official_name": "melbourne.figshare.com" + }, + "BL.READING": { + "openaire_id": "re3data_____::r3d100012064", + "datacite_name": "University of Reading Research Data Archive", + "official_name": "University of Reading Research Data Archive" + }, + "BL.SALFORD": { + "openaire_id": "re3data_____::r3d100012144", + "datacite_name": "University of Salford Data Repository", + "official_name": "University of Salford Data Repository" + }, + "BL.SOTON": { + "openaire_id": "re3data_____::r3d100011245", + "datacite_name": "University of Southampton Institutional Research Repository", + "official_name": "University of Southampton Institutional Research Repository" + }, + "ARDCX.USQ": { + "openaire_id": "re3data_____::r3d100011638", + "datacite_name": "University of Southern Queensland research data collection", + "official_name": "University of Southern Queensland research data collection" + }, + "BL.STANDREW": { + "openaire_id": "re3data_____::r3d100012411", + "datacite_name": "St Andrews Research portal - Research Data", + "official_name": "St Andrews Research portal - Research Data" + }, + "BL.STRATH": { + "openaire_id": "re3data_____::r3d100012412", + "datacite_name": "University of Strathclyde KnowledgeBase Datasets", + "official_name": "University of Strathclyde KnowledgeBase Datasets" + }, + "BL.SURREY": { + "openaire_id": "re3data_____::r3d100012232", + "datacite_name": "Surrey Research Insight", + "official_name": "Surrey Research Insight" + }, + "USDA.USDA": { + "openaire_id": "re3data_____::r3d100011890", + "datacite_name": "Ag Data Commons", + "official_name": "Ag Data Commons" + }, + "USGS.PROD": { + "openaire_id": "re3data_____::r3d100010054", + "datacite_name": "U.S. Geological Survey", + "official_name": "U.S. Geological Survey" + }, + "DELFT.UU": { + "openaire_id": "re3data_____::r3d100012623", + "datacite_name": "Yoda", + "official_name": "Yoda" + }, + "VT.VTECHDATA": { + "openaire_id": "re3data_____::r3d100012601", + "datacite_name": "Virginia Tech Data Repository", + "official_name": "Virginia Tech Data Repository" + }, + "JCVI.EIVBWB": { + "openaire_id": "re3data_____::r3d100011931", + "datacite_name": "Virus Pathogen Resource", + "official_name": "Virus Pathogen Resource" + }, + "VIVLI.SEARCH": { + "openaire_id": "re3data_____::r3d100012823", + "datacite_name": "Vivli", + "official_name": "Vivli" + }, + "DELFT.VLIZ": { + "openaire_id": "re3data_____::r3d100010661", + "datacite_name": "Flanders Marine Institute", + "official_name": "Flanders Marine Institute" + }, + "WH.WHOAS": { + "openaire_id": "re3data_____::r3d100010423", + "datacite_name": "Woods Hole Open Access Server", + "official_name": "Woods Hole Open Access Server" + }, + "DKRZ.WDCC": { + "openaire_id": "re3data_____::r3d100010299", + "datacite_name": "World Data Center for Climate", + "official_name": "World Data Center for Climate" + }, + "ETHZ.WGMS": { + "openaire_id": "re3data_____::r3d100010627", + "datacite_name": "World Glacier Monitoring Service", + "official_name": "World Glacier Monitoring Service" + }, + "ZBW.ZBW-JDA": { + "openaire_id": "re3data_____::r3d100012190", + "datacite_name": "ZBW Journal Data Archive", + "official_name": "ZBW Journal Data Archive" + }, + "CERN.ZENODO": { + "openaire_id": "re3data_____::r3d100010468", + "datacite_name": "Zenodo", + "official_name": "Zenodo" + }, + "ZBW.ZEW": { + "openaire_id": "re3data_____::r3d100010399", + "datacite_name": "ZEW Forschungsdatenzentrum", + "official_name": "ZEW Forschungsdatenzentrum" + } } \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/config-default.xml new file mode 100644 index 000000000..bdd48b0ab --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/config-default.xml @@ -0,0 +1,19 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/workflow.xml new file mode 100644 index 000000000..751b124cf --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/datacite/oozie_app/workflow.xml @@ -0,0 +1,62 @@ + + + + sourcePath + the source path of scholix graph + + + datacitePath + the datacite native path + + + workingSupportPath + the working Support path + + + isLookupUrl + The IS lookUp service endopoint + + + updateDS + false + The transformation Rule to apply + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + New Update from Datacite to Scholix + eu.dnetlib.dhp.sx.graph.SparkRetrieveDataciteDelta + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=6000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --datacitePath${datacitePath} + --masteryarn + --workingSupportPath${workingSupportPath} + --isLookupUrl${isLookupUrl} + --updateDS${updateDS} + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json new file mode 100644 index 000000000..78777ffff --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json @@ -0,0 +1,41 @@ +[ + { + "paramName": "s", + "paramLongName": "sourcePath", + "paramDescription": "the source mdstore path", + "paramRequired": true + }, + + { + "paramName": "d", + "paramLongName": "datacitePath", + "paramDescription": "the datacite native path", + "paramRequired": true + }, + + { + "paramName": "w", + "paramLongName": "workingSupportPath", + "paramDescription": "the working Support path", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "isLookupUrl", + "paramDescription": "the isLookup URL", + "paramRequired": true + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "the master name", + "paramRequired": true + }, + { + "paramName": "u", + "paramLongName": "updateDS", + "paramDescription": "Need to regenerate all support Dataset", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties b/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties index 63cba917e..81458d1f7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties +++ b/dhp-workflows/dhp-aggregation/src/main/resources/log4j.properties @@ -7,3 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n + +log4j.logger.org.apache.spark=FATAL +log4j.logger.org.spark_project=FATAL diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala similarity index 58% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala index 11ecfd6cb..85f5a3082 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -1,20 +1,20 @@ package eu.dnetlib.dhp.collection +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelSupport import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode} object CollectionUtils { - /** - * This method in pipeline to the transformation phase, - * generates relations in both verse, typically it should be a phase of flatMap - * - * @param i input OAF - * @return - * If the input OAF is an entity -> List(i) - * If the input OAF is a relation -> List(relation, inverseRelation) - * - */ + /** This method in pipeline to the transformation phase, + * generates relations in both verse, typically it should be a phase of flatMap + * + * @param i input OAF + * @return + * If the input OAF is an entity -> List(i) + * If the input OAF is a relation -> List(relation, inverseRelation) + */ def fixRelations(i: Oaf): List[Oaf] = { if (i.isInstanceOf[OafEntity]) @@ -46,4 +46,18 @@ object CollectionUtils { List() } + def saveDataset(dataset: Dataset[Oaf], targetPath: String): Unit = { + implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + val mapper = new ObjectMapper + + dataset + .flatMap(i => CollectionUtils.fixRelations(i)) + .filter(i => i != null) + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(targetPath) + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala similarity index 94% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala index 6a9b8e3e5..471149b25 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala @@ -6,7 +6,6 @@ import org.apache.http.client.methods.{HttpGet, HttpPost, HttpUriRequest} import org.apache.http.entity.StringEntity import org.apache.http.impl.client.HttpClientBuilder - abstract class AbstractRestClient extends Iterator[String] { var buffer: List[String] = List() @@ -16,12 +15,10 @@ abstract class AbstractRestClient extends Iterator[String] { var complete: Boolean = false - def extractInfo(input: String): Unit protected def getBufferData(): Unit - def doHTTPGETRequest(url: String): String = { val httpGet = new HttpGet(url) doHTTPRequest(httpGet) @@ -43,7 +40,6 @@ abstract class AbstractRestClient extends Iterator[String] { buffer.nonEmpty && current_index < buffer.size } - override def next(): String = { val next_item: String = buffer(current_index) current_index = current_index + 1 @@ -52,13 +48,14 @@ abstract class AbstractRestClient extends Iterator[String] { next_item } - private def doHTTPRequest[A <: HttpUriRequest](r: A): String = { val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() try { var tries = 4 @@ -69,8 +66,7 @@ abstract class AbstractRestClient extends Iterator[String] { println(s"get response with status${response.getStatusLine.getStatusCode}") if (response.getStatusLine.getStatusCode > 400) { tries -= 1 - } - else + } else return IOUtils.toString(response.getEntity.getContent) } catch { case e: Throwable => @@ -87,4 +83,4 @@ abstract class AbstractRestClient extends Iterator[String] { } getBufferData() -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala similarity index 78% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala index 7ec44a6ff..d2fd709aa 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.datacite import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.{DefaultFormats, JValue} -class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until:Long = -1) extends AbstractRestClient { +class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) extends AbstractRestClient { override def extractInfo(input: String): Unit = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -16,16 +16,18 @@ class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until:Long = - current_index = 0 } - def get_url():String ={ - val to = if (until> 0) s"$until" else "*" + def get_url(): String = { + val to = if (until > 0) s"$until" else "*" s"https://api.datacite.org/dois?page[cursor]=1&page[size]=$blocks&query=updated:[$timestamp%20TO%20$to]" } override def getBufferData(): Unit = { if (!complete) { - val response = if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get) else doHTTPGETRequest(get_url()) + val response = + if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get) + else doHTTPGETRequest(get_url()) extractInfo(response) } } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala new file mode 100644 index 000000000..a59779387 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteModelConstants.scala @@ -0,0 +1,278 @@ +package eu.dnetlib.dhp.datacite + +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue} + +import java.io.InputStream +import java.time.format.DateTimeFormatter +import java.util.Locale +import java.util.regex.Pattern +import scala.io.Source + +/** This class represent the dataModel of the input Dataset of Datacite + * @param doi THE DOI + * @param timestamp timestamp of last update date + * @param isActive the record is active or deleted + * @param json the json native records + */ +case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {} + +/* + The following class are utility class used for the mapping from + json datacite to OAF Shema + */ +case class RelatedIdentifierType( + relationType: String, + relatedIdentifier: String, + relatedIdentifierType: String +) {} + +case class NameIdentifiersType( + nameIdentifierScheme: Option[String], + schemeUri: Option[String], + nameIdentifier: Option[String] +) {} + +case class CreatorType( + nameType: Option[String], + nameIdentifiers: Option[List[NameIdentifiersType]], + name: Option[String], + familyName: Option[String], + givenName: Option[String], + affiliation: Option[List[String]] +) {} + +case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} + +case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {} + +case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} + +case class FundingReferenceType( + funderIdentifierType: Option[String], + awardTitle: Option[String], + awardUri: Option[String], + funderName: Option[String], + funderIdentifier: Option[String], + awardNumber: Option[String] +) {} + +case class DateType(date: Option[String], dateType: Option[String]) {} + +case class OAFRelations(relation: String, inverse: String, relType: String) + +class DataciteModelConstants extends Serializable {} + +object DataciteModelConstants { + + val REL_TYPE_VALUE: String = "resultResult" + val DATE_RELATION_KEY = "RelationDate" + val DATACITE_FILTER_PATH = "/eu/dnetlib/dhp/datacite/datacite_filter" + val DOI_CLASS = "doi" + val SUBJ_CLASS = "keywords" + val DATACITE_NAME = "Datacite" + val dataInfo: DataInfo = dataciteDataInfo("0.9") + + val DATACITE_COLLECTED_FROM: KeyValue = + OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) + + val subRelTypeMapping: Map[String, OAFRelations] = Map( + ModelConstants.REFERENCES -> OAFRelations( + ModelConstants.REFERENCES, + ModelConstants.IS_REFERENCED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_REFERENCED_BY -> OAFRelations( + ModelConstants.IS_REFERENCED_BY, + ModelConstants.REFERENCES, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations( + ModelConstants.IS_SUPPLEMENTED_BY, + ModelConstants.IS_SUPPLEMENT_TO, + ModelConstants.SUPPLEMENT + ), + ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations( + ModelConstants.IS_SUPPLEMENT_TO, + ModelConstants.IS_SUPPLEMENTED_BY, + ModelConstants.SUPPLEMENT + ), + ModelConstants.HAS_PART -> OAFRelations( + ModelConstants.HAS_PART, + ModelConstants.IS_PART_OF, + ModelConstants.PART + ), + ModelConstants.IS_PART_OF -> OAFRelations( + ModelConstants.IS_PART_OF, + ModelConstants.HAS_PART, + ModelConstants.PART + ), + ModelConstants.IS_VERSION_OF -> OAFRelations( + ModelConstants.IS_VERSION_OF, + ModelConstants.HAS_VERSION, + ModelConstants.VERSION + ), + ModelConstants.HAS_VERSION -> OAFRelations( + ModelConstants.HAS_VERSION, + ModelConstants.IS_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.IS_IDENTICAL_TO -> OAFRelations( + ModelConstants.IS_IDENTICAL_TO, + ModelConstants.IS_IDENTICAL_TO, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_CONTINUED_BY -> OAFRelations( + ModelConstants.IS_CONTINUED_BY, + ModelConstants.CONTINUES, + ModelConstants.RELATIONSHIP + ), + ModelConstants.CONTINUES -> OAFRelations( + ModelConstants.CONTINUES, + ModelConstants.IS_CONTINUED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_NEW_VERSION_OF -> OAFRelations( + ModelConstants.IS_NEW_VERSION_OF, + ModelConstants.IS_PREVIOUS_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.IS_PREVIOUS_VERSION_OF -> OAFRelations( + ModelConstants.IS_PREVIOUS_VERSION_OF, + ModelConstants.IS_NEW_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations( + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.DOCUMENTS, + ModelConstants.RELATIONSHIP + ), + ModelConstants.DOCUMENTS -> OAFRelations( + ModelConstants.DOCUMENTS, + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_SOURCE_OF -> OAFRelations( + ModelConstants.IS_SOURCE_OF, + ModelConstants.IS_DERIVED_FROM, + ModelConstants.VERSION + ), + ModelConstants.IS_DERIVED_FROM -> OAFRelations( + ModelConstants.IS_DERIVED_FROM, + ModelConstants.IS_SOURCE_OF, + ModelConstants.VERSION + ), + ModelConstants.CITES -> OAFRelations( + ModelConstants.CITES, + ModelConstants.IS_CITED_BY, + ModelConstants.CITATION + ), + ModelConstants.IS_CITED_BY -> OAFRelations( + ModelConstants.IS_CITED_BY, + ModelConstants.CITES, + ModelConstants.CITATION + ), + ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations( + ModelConstants.IS_VARIANT_FORM_OF, + ModelConstants.IS_DERIVED_FROM, + ModelConstants.VERSION + ), + ModelConstants.IS_OBSOLETED_BY -> OAFRelations( + ModelConstants.IS_OBSOLETED_BY, + ModelConstants.IS_NEW_VERSION_OF, + ModelConstants.VERSION + ), + ModelConstants.REVIEWS -> OAFRelations( + ModelConstants.REVIEWS, + ModelConstants.IS_REVIEWED_BY, + ModelConstants.REVIEW + ), + ModelConstants.IS_REVIEWED_BY -> OAFRelations( + ModelConstants.IS_REVIEWED_BY, + ModelConstants.REVIEWS, + ModelConstants.REVIEW + ), + ModelConstants.DOCUMENTS -> OAFRelations( + ModelConstants.DOCUMENTS, + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_DOCUMENTED_BY -> OAFRelations( + ModelConstants.IS_DOCUMENTED_BY, + ModelConstants.DOCUMENTS, + ModelConstants.RELATIONSHIP + ), + ModelConstants.COMPILES -> OAFRelations( + ModelConstants.COMPILES, + ModelConstants.IS_COMPILED_BY, + ModelConstants.RELATIONSHIP + ), + ModelConstants.IS_COMPILED_BY -> OAFRelations( + ModelConstants.IS_COMPILED_BY, + ModelConstants.COMPILES, + ModelConstants.RELATIONSHIP + ) + ) + + val datacite_filter: List[String] = { + val stream: InputStream = getClass.getResourceAsStream(DATACITE_FILTER_PATH) + require(stream != null) + Source.fromInputStream(stream).getLines().toList + } + + def dataciteDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo( + false, + null, + false, + false, + ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, + trust + ) + + val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern( + "[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", + Locale.ENGLISH + ) + + val df_it: DateTimeFormatter = + DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN) + + val funder_regex: List[(Pattern, String)] = List( + ( + Pattern.compile( + "(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", + Pattern.MULTILINE | Pattern.CASE_INSENSITIVE + ), + "40|corda__h2020::" + ), + ( + Pattern.compile( + "(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", + Pattern.MULTILINE | Pattern.CASE_INSENSITIVE + ), + "40|corda_______::" + ) + ) + + val Date_regex: List[Pattern] = List( + //Y-M-D + Pattern.compile( + "(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", + Pattern.MULTILINE + ), + //M-D-Y + Pattern.compile( + "((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", + Pattern.MULTILINE + ), + //D-M-Y + Pattern.compile( + "(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", + Pattern.MULTILINE + ), + //Y + Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) + ) + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala new file mode 100644 index 000000000..48d0f1497 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -0,0 +1,652 @@ +package eu.dnetlib.dhp.datacite + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.datacite.DataciteModelConstants._ +import eu.dnetlib.dhp.schema.action.AtomicAction +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} +import eu.dnetlib.dhp.utils.DHPUtils +import org.apache.commons.lang3.StringUtils +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.parse + +import java.text.SimpleDateFormat +import java.time.LocalDate +import java.time.chrono.ThaiBuddhistDate +import java.time.format.DateTimeFormatter +import java.util.{Date, Locale} +import scala.collection.JavaConverters._ +import scala.io.{Codec, Source} + +object DataciteToOAFTransformation { + + case class HostedByMapType( + openaire_id: String, + datacite_name: String, + official_name: String, + similarity: Option[Float] + ) {} + + val mapper = new ObjectMapper() + + val unknown_repository: HostedByMapType = HostedByMapType( + ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, + ModelConstants.UNKNOWN_REPOSITORY.getValue, + ModelConstants.UNKNOWN_REPOSITORY.getValue, + Some(1.0f) + ) + + val hostedByMap: Map[String, HostedByMapType] = { + val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(s) + json.extract[Map[String, HostedByMapType]] + } + + /** This method should skip record if json contains invalid text + * defined in gile datacite_filter + * + * @param json + * @return True if the record should be skipped + */ + def skip_record(json: String): Boolean = { + datacite_filter.exists(f => json.contains(f)) + } + + @deprecated("this method will be removed", "dhp") + def toActionSet(item: Oaf): (String, String) = { + val mapper = new ObjectMapper() + + item match { + case dataset: OafDataset => + val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset] + a.setClazz(classOf[OafDataset]) + a.setPayload(dataset) + (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case publication: Publication => + val a: AtomicAction[Publication] = new AtomicAction[Publication] + a.setClazz(classOf[Publication]) + a.setPayload(publication) + (publication.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case software: Software => + val a: AtomicAction[Software] = new AtomicAction[Software] + a.setClazz(classOf[Software]) + a.setPayload(software) + (software.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case orp: OtherResearchProduct => + val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct] + a.setClazz(classOf[OtherResearchProduct]) + a.setPayload(orp) + (orp.getClass.getCanonicalName, mapper.writeValueAsString(a)) + + case relation: Relation => + val a: AtomicAction[Relation] = new AtomicAction[Relation] + a.setClazz(classOf[Relation]) + a.setPayload(relation) + (relation.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case _ => + null + } + + } + + def embargo_end(embargo_end_date: String): Boolean = { + val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) + val td = LocalDate.now() + td.isAfter(dt) + } + + def extract_date(input: String): Option[String] = { + val d = Date_regex + .map(pattern => { + val matcher = pattern.matcher(input) + if (matcher.find()) + matcher.group(0) + else + null + }) + .find(s => s != null) + + if (d.isDefined) { + val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get + try { + return Some(LocalDate.parse(a_date, df_en).toString) + } catch { + case _: Throwable => + try { + return Some(LocalDate.parse(a_date, df_it).toString) + } catch { + case _: Throwable => + return None + } + } + } + d + } + + def fix_thai_date(input: String, format: String): String = { + try { + val a_date = LocalDate.parse(input, DateTimeFormatter.ofPattern(format)) + val d = ThaiBuddhistDate.of(a_date.getYear, a_date.getMonth.getValue, a_date.getDayOfMonth) + LocalDate.from(d).toString + } catch { + case _: Throwable => "" + } + } + + def getTypeQualifier( + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): (Qualifier, Qualifier) = { + if (resourceType != null && resourceType.nonEmpty) { + val typeQualifier = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) + if (typeQualifier != null) + return ( + typeQualifier, + vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_RESULT_TYPOLOGIES, + typeQualifier.getClassid + ) + ) + } + if (schemaOrg != null && schemaOrg.nonEmpty) { + val typeQualifier = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg) + if (typeQualifier != null) + return ( + typeQualifier, + vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_RESULT_TYPOLOGIES, + typeQualifier.getClassid + ) + ) + + } + if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) { + val typeQualifier = vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_PUBLICATION_RESOURCE, + resourceTypeGeneral + ) + if (typeQualifier != null) + return ( + typeQualifier, + vocabularies.getSynonymAsQualifier( + ModelConstants.DNET_RESULT_TYPOLOGIES, + typeQualifier.getClassid + ) + ) + + } + null + } + + def getResult( + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): Result = { + val typeQualifiers: (Qualifier, Qualifier) = + getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) + if (typeQualifiers == null) + return null + val i = new Instance + i.setInstancetype(typeQualifiers._1) + typeQualifiers._2.getClassname match { + case "dataset" => + val r = new OafDataset + r.setInstance(List(i).asJava) + return r + case "publication" => + val r = new Publication + r.setInstance(List(i).asJava) + return r + case "software" => + val r = new Software + r.setInstance(List(i).asJava) + return r + case "other" => + val r = new OtherResearchProduct + r.setInstance(List(i).asJava) + return r + } + null + } + + def available_date(input: String): Boolean = { + + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + val l: List[String] = for { + JObject(dates) <- json \\ "dates" + JField("dateType", JString(dateTypes)) <- dates + } yield dateTypes + + l.exists(p => p.equalsIgnoreCase("available")) + + } + + /** As describe in ticket #6377 + * when the result come from figshare we need to remove subject + * and set Access rights OPEN. + * + * @param r + */ + def fix_figshare(r: Result): Unit = { + + if (r.getInstance() != null) { + val hosted_by_figshare = r + .getInstance() + .asScala + .exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue)) + if (hosted_by_figshare) { + r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT())) + val l: List[StructuredProperty] = List() + r.setSubject(l.asJava) + } + } + + } + + def createDNetTargetIdentifier(pid: String, pidType: String, idPrefix: String): String = { + val f_part = s"$idPrefix|${pidType.toLowerCase}".padTo(15, '_') + s"$f_part::${IdentifierFactory.md5(pid.toLowerCase)}" + } + + def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = { + OafMapperUtils.structuredProperty(dt, q, null) + } + + def generateRelation( + sourceId: String, + targetId: String, + relClass: String, + cf: KeyValue, + di: DataInfo + ): Relation = { + + val r = new Relation + r.setSource(sourceId) + r.setTarget(targetId) + r.setRelType(ModelConstants.RESULT_PROJECT) + r.setRelClass(relClass) + r.setSubRelType(ModelConstants.OUTCOME) + r.setCollectedfrom(List(cf).asJava) + r.setDataInfo(di) + r + + } + + def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = { + val match_pattern = funder_regex.find(s => s._1.matcher(awardUri).find()) + + if (match_pattern.isDefined) { + val m = match_pattern.get._1 + val p = match_pattern.get._2 + val grantId = m.matcher(awardUri).replaceAll("$2") + val targetId = s"$p${DHPUtils.md5(grantId)}" + List(generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo)) + } else + List() + + } + + def generateOAF( + input: String, + ts: Long, + dateOfCollection: Long, + vocabularies: VocabularyGroup, + exportLinks: Boolean + ): List[Oaf] = { + if (skip_record(input)) + return List() + + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + + val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null) + val resourceTypeGeneral = + (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null) + val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null) + + val doi = (json \ "attributes" \ "doi").extract[String] + if (doi.isEmpty) + return List() + + //Mapping type based on vocabularies dnet:publication_resource and dnet:result_typologies + val result = getResult(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) + if (result == null) + return List() + + val doi_q = OafMapperUtils.qualifier( + "doi", + "doi", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ) + val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) + result.setPid(List(pid).asJava) + result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) + result.setOriginalId(List(doi).asJava) + + val d = new Date(dateOfCollection * 1000) + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + + result.setDateofcollection(ISO8601FORMAT.format(d)) + result.setDateoftransformation(ISO8601FORMAT.format(d)) + result.setDataInfo(dataInfo) + + val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) + + val authors = creators.zipWithIndex.map { case (c, idx) => + val a = new Author + a.setFullname(c.name.orNull) + a.setName(c.givenName.orNull) + a.setSurname(c.familyName.orNull) + if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) { + a.setPid( + c.nameIdentifiers.get + .map(ni => { + val q = + if (ni.nameIdentifierScheme.isDefined) + vocabularies.getTermAsQualifier( + ModelConstants.DNET_PID_TYPES, + ni.nameIdentifierScheme.get.toLowerCase() + ) + else null + if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) { + OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo) + } else + null + + }) + .asJava + ) + } + if (c.affiliation.isDefined) + a.setAffiliation( + c.affiliation.get + .filter(af => af.nonEmpty) + .map(af => OafMapperUtils.field(af, dataInfo)) + .asJava + ) + a.setRank(idx + 1) + a + } + + val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) + + result.setTitle( + titles + .filter(t => t.title.nonEmpty) + .map(t => { + if (t.titleType.isEmpty) { + OafMapperUtils + .structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null) + } else { + OafMapperUtils.structuredProperty( + t.title.get, + t.titleType.get, + t.titleType.get, + ModelConstants.DNET_DATACITE_TITLE, + ModelConstants.DNET_DATACITE_TITLE, + null + ) + } + }) + .asJava + ) + + if (authors == null || authors.isEmpty || !authors.exists(a => a != null)) + return List() + result.setAuthor(authors.asJava) + + val dates = (json \\ "dates").extract[List[DateType]] + val publication_year = (json \\ "publicationYear").extractOrElse[String](null) + + val i_date = dates + .filter(d => d.date.isDefined && d.dateType.isDefined) + .find(d => d.dateType.get.equalsIgnoreCase("issued")) + .map(d => extract_date(d.date.get)) + val a_date: Option[String] = dates + .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")) + .map(d => extract_date(d.date.get)) + .find(d => d != null && d.isDefined) + .map(d => d.get) + + if (a_date.isDefined) { + if (doi.startsWith("10.14457")) + result.setEmbargoenddate( + OafMapperUtils.field(fix_thai_date(a_date.get, "[yyyy-MM-dd]"), null) + ) + else + result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null)) + } + if (i_date.isDefined && i_date.get.isDefined) { + if (doi.startsWith("10.14457")) { + result.setDateofacceptance( + OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null) + ) + result + .getInstance() + .get(0) + .setDateofacceptance( + OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null) + ) + } else { + result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) + result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) + } + } else if (publication_year != null) { + if (doi.startsWith("10.14457")) { + result.setDateofacceptance( + OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null) + ) + result + .getInstance() + .get(0) + .setDateofacceptance( + OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null) + ) + + } else { + result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) + result + .getInstance() + .get(0) + .setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) + } + } + + result.setRelevantdate( + dates + .filter(d => d.date.isDefined && d.dateType.isDefined) + .map(d => (extract_date(d.date.get), d.dateType.get)) + .filter(d => d._1.isDefined) + .map(d => + ( + d._1.get, + vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase()) + ) + ) + .filter(d => d._2 != null) + .map(d => generateOAFDate(d._1, d._2)) + .asJava + ) + + val subjects = (json \\ "subjects").extract[List[SubjectType]] + + result.setSubject( + subjects + .filter(s => s.subject.nonEmpty) + .map(s => + OafMapperUtils.structuredProperty( + s.subject.get, + SUBJ_CLASS, + SUBJ_CLASS, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + null + ) + ) + .asJava + ) + + result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) + + val descriptions = (json \\ "descriptions").extract[List[DescriptionType]] + + result.setDescription( + descriptions + .filter(d => d.description.isDefined) + .map(d => OafMapperUtils.field(d.description.get, null)) + .filter(s => s != null) + .asJava + ) + + val publisher = (json \\ "publisher").extractOrElse[String](null) + if (publisher != null) + result.setPublisher(OafMapperUtils.field(publisher, null)) + + val language: String = (json \\ "language").extractOrElse[String](null) + + if (language != null) + result.setLanguage( + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language) + ) + + val instance = result.getInstance().get(0) + + val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String] + + val accessRights: List[String] = for { + JObject(rightsList) <- json \\ "rightsList" + JField("rightsUri", JString(rightsUri)) <- rightsList + } yield rightsUri + + val aRights: Option[AccessRight] = accessRights + .map(r => { + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r) + }) + .find(q => q != null) + .map(q => { + val a = new AccessRight + a.setClassid(q.getClassid) + a.setClassname(q.getClassname) + a.setSchemeid(q.getSchemeid) + a.setSchemename(q.getSchemename) + a + }) + + val access_rights_qualifier = + if (aRights.isDefined) aRights.get + else + OafMapperUtils.accessRight( + ModelConstants.UNKNOWN, + ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) + + if (client.isDefined) { + + val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository) + instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name)) + + instance.setCollectedfrom(DATACITE_COLLECTED_FROM) + instance.setUrl(List(s"https://dx.doi.org/$doi").asJava) + instance.setAccessright(access_rights_qualifier) + instance.setPid(result.getPid) + val license = accessRights + .find(r => + r.startsWith("http") && r.matches( + ".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*" + ) + ) + if (license.isDefined) + instance.setLicense(OafMapperUtils.field(license.get, null)) + } + + val awardUris: List[String] = for { + JObject(fundingReferences) <- json \\ "fundingReferences" + JField("awardUri", JString(awardUri)) <- fundingReferences + } yield awardUri + + result.setId(IdentifierFactory.createIdentifier(result)) + var relations: List[Relation] = + awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null) + + fix_figshare(result) + + if (result.getId == null) + return List() + + if (exportLinks) { + val rels: List[RelatedIdentifierType] = for { + JObject(relIdentifier) <- json \\ "relatedIdentifiers" + JField("relationType", JString(relationType)) <- relIdentifier + JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier + JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier + } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) + + relations = relations ::: generateRelations( + rels, + result.getId, + if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null + ) + } + if (relations != null && relations.nonEmpty) { + List(result) ::: relations + } else + List(result) + } + + private def generateRelations( + rels: List[RelatedIdentifierType], + id: String, + date: String + ): List[Relation] = { + rels + .filter(r => + subRelTypeMapping + .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || + r.relatedIdentifierType.equalsIgnoreCase("pmid") || + r.relatedIdentifierType.equalsIgnoreCase("arxiv")) + ) + .map(r => { + val rel = new Relation + rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) + rel.setDataInfo(dataInfo) + + val subRelType = subRelTypeMapping(r.relationType).relType + rel.setRelType(REL_TYPE_VALUE) + rel.setSubRelType(subRelType) + rel.setRelClass(r.relationType) + + val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) + + rel.setProperties(List(dateProps).asJava) + + rel.setSource(id) + rel.setTarget( + DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType) + ) + rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) + rel.getCollectedfrom.asScala.map(c => c.getValue).toList + rel + }) + } + + def generateDSId(input: String): String = { + val b = StringUtils.substringBefore(input, "::") + val a = StringUtils.substringAfter(input, "::") + s"10|$b::${DHPUtils.md5(a)}" + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala new file mode 100644 index 000000000..046290969 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -0,0 +1,110 @@ +package eu.dnetlib.dhp.datacite + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile +import eu.dnetlib.dhp.utils.ISLookupClientFactory +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +class GenerateDataciteDatasetSpark(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { + + val sourcePath = parser.get("sourcePath") + log.info(s"SourcePath is '$sourcePath'") + val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks")) + log.info(s"exportLinks is '$exportLinks'") + val isLookupUrl: String = parser.get("isLookupUrl") + log.info("isLookupUrl: {}", isLookupUrl) + + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) + val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) + require(vocabularies != null) + + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info(s"mdstoreOutputVersion is '$mdstoreOutputVersion'") + + val mapper = new ObjectMapper() + val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info(s"outputBasePath is '$outputBasePath'") + val targetPath = s"$outputBasePath$MDSTORE_DATA_PATH" + log.info(s"targetPath is '$targetPath'") + + generateDataciteDataset(sourcePath, exportLinks, vocabularies, targetPath, spark) + + reportTotalSize(targetPath, outputBasePath) + } + + /** For working with MDStore we need to store in a file on hdfs the size of + * the current dataset + * @param targetPath + * @param outputBasePath + */ + def reportTotalSize(targetPath: String, outputBasePath: String): Unit = { + val total_items = spark.read.text(targetPath).count() + writeHdfsFile( + spark.sparkContext.hadoopConfiguration, + s"$total_items", + outputBasePath + MDSTORE_SIZE_PATH + ) + } + + /** Generate the transformed and cleaned OAF Dataset from the native one + * + * @param sourcePath sourcePath of the native Dataset in format JSON/Datacite + * @param exportLinks If true it generates unresolved links + * @param vocabularies vocabularies for cleaning + * @param targetPath the targetPath of the result Dataset + */ + def generateDataciteDataset( + sourcePath: String, + exportLinks: Boolean, + vocabularies: VocabularyGroup, + targetPath: String, + spark: SparkSession + ): Unit = { + require(spark != null) + import spark.implicits._ + + implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] + + implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + CollectionUtils.saveDataset( + spark.read + .load(sourcePath) + .as[DataciteType] + .filter(d => d.isActive) + .flatMap(d => + DataciteToOAFTransformation + .generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks) + ) + .filter(d => d != null), + targetPath + ) + } + +} + +object GenerateDataciteDatasetSpark { + + val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass) + + def main(args: Array[String]): Unit = { + new GenerateDataciteDatasetSpark( + "/eu/dnetlib/dhp/datacite/generate_dataset_params.json", + args, + log + ).initialize().run() + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala similarity index 69% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala index 018b4958a..cb021925a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala @@ -22,7 +22,6 @@ object ImportDatacite { val log: Logger = LoggerFactory.getLogger(ImportDatacite.getClass) - def convertAPIStringToDataciteItem(input: String): DataciteType = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: org.json4s.JValue = parse(input) @@ -32,14 +31,26 @@ object ImportDatacite { val timestamp_string = (json \ "attributes" \ "updated").extract[String] val dt = LocalDateTime.parse(timestamp_string, ISO_DATE_TIME) - DataciteType(doi = doi, timestamp = dt.toInstant(ZoneOffset.UTC).toEpochMilli / 1000, isActive = isActive, json = input) + DataciteType( + doi = doi, + timestamp = dt.toInstant(ZoneOffset.UTC).toEpochMilli / 1000, + isActive = isActive, + json = input + ) } - def main(args: Array[String]): Unit = { - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json" + ) + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") @@ -60,7 +71,8 @@ object ImportDatacite { val spkipImport = parser.get("skipImport") log.info(s"skipImport is $spkipImport") - val spark: SparkSession = SparkSession.builder() + val spark: SparkSession = SparkSession + .builder() .appName(ImportDatacite.getClass.getSimpleName) .master(master) .getOrCreate() @@ -78,45 +90,48 @@ object ImportDatacite { import spark.implicits._ + val dataciteAggregator: Aggregator[DataciteType, DataciteType, DataciteType] = + new Aggregator[DataciteType, DataciteType, DataciteType] with Serializable { - val dataciteAggregator: Aggregator[DataciteType, DataciteType, DataciteType] = new Aggregator[DataciteType, DataciteType, DataciteType] with Serializable { + override def zero: DataciteType = null - override def zero: DataciteType = null - - override def reduce(a: DataciteType, b: DataciteType): DataciteType = { - if (b == null) - return a - if (a == null) - return b - if (a.timestamp > b.timestamp) { - return a + override def reduce(a: DataciteType, b: DataciteType): DataciteType = { + if (b == null) + return a + if (a == null) + return b + if (a.timestamp > b.timestamp) { + return a + } + b } - b + + override def merge(a: DataciteType, b: DataciteType): DataciteType = { + reduce(a, b) + } + + override def bufferEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] + + override def outputEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] + + override def finish(reduction: DataciteType): DataciteType = reduction } - override def merge(a: DataciteType, b: DataciteType): DataciteType = { - reduce(a, b) - } - - override def bufferEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] - - override def outputEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]] - - override def finish(reduction: DataciteType): DataciteType = reduction - } - val dump: Dataset[DataciteType] = spark.read.load(dataciteDump).as[DataciteType] val ts = dump.select(max("timestamp")).first().getLong(0) println(s"last Timestamp is $ts") - val cnt = if ("true".equalsIgnoreCase(spkipImport)) 1 else writeSequenceFile(hdfsTargetPath, ts, conf, bs) + val cnt = + if ("true".equalsIgnoreCase(spkipImport)) 1 + else writeSequenceFile(hdfsTargetPath, ts, conf, bs) println(s"Imported from Datacite API $cnt documents") if (cnt > 0) { - val inputRdd: RDD[DataciteType] = sc.sequenceFile(targetPath, classOf[Int], classOf[Text]) + val inputRdd: RDD[DataciteType] = sc + .sequenceFile(targetPath, classOf[Int], classOf[Text]) .map(s => s._2.toString) .map(s => convertAPIStringToDataciteItem(s)) spark.createDataset(inputRdd).write.mode(SaveMode.Overwrite).save(s"${targetPath}_dataset") @@ -129,7 +144,9 @@ object ImportDatacite { .agg(dataciteAggregator.toColumn) .map(s => s._2) .repartition(4000) - .write.mode(SaveMode.Overwrite).save(s"${dataciteDump}_updated") + .write + .mode(SaveMode.Overwrite) + .save(s"${dataciteDump}_updated") val fs = FileSystem.get(sc.hadoopConfiguration) fs.delete(new Path(s"$dataciteDump"), true) @@ -137,14 +154,24 @@ object ImportDatacite { } } - private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs: Int): Long = { + private def writeSequenceFile( + hdfsTargetPath: Path, + timestamp: Long, + conf: Configuration, + bs: Int + ): Long = { var from: Long = timestamp * 1000 val delta: Long = 100000000L var client: DataciteAPIImporter = null val now: Long = System.currentTimeMillis() var i = 0 try { - val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(hdfsTargetPath), SequenceFile.Writer.keyClass(classOf[IntWritable]), SequenceFile.Writer.valueClass(classOf[Text])) + val writer = SequenceFile.createWriter( + conf, + SequenceFile.Writer.file(hdfsTargetPath), + SequenceFile.Writer.keyClass(classOf[IntWritable]), + SequenceFile.Writer.valueClass(classOf[Text]) + ) try { var start: Long = System.currentTimeMillis while (from < now) { @@ -153,16 +180,16 @@ object ImportDatacite { val key: IntWritable = new IntWritable(i) val value: Text = new Text while (client.hasNext) { - key.set({ + key.set { i += 1; i - 1 - }) + } value.set(client.next()) writer.append(key, value) writer.hflush() if (i % 1000 == 0) { end = System.currentTimeMillis - val time = (end - start) / 1000.0F + val time = (end - start) / 1000.0f println(s"Imported $i in $time seconds") start = System.currentTimeMillis } @@ -174,8 +201,7 @@ object ImportDatacite { case e: Throwable => println("Error", e) } finally if (writer != null) writer.close() - } - catch { + } catch { case e: Throwable => log.error("Error", e) } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala similarity index 69% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala index d46e5423d..3e61edf02 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala @@ -17,7 +17,13 @@ object SparkDownloadUpdateDatacite { def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json") + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") @@ -26,8 +32,9 @@ object SparkDownloadUpdateDatacite { val hdfsuri = parser.get("namenode") log.info(s"namenode is $hdfsuri") - - val spark: SparkSession = SparkSession.builder().config(conf) + val spark: SparkSession = SparkSession + .builder() + .config(conf) .appName(getClass.getSimpleName) .master(master) .getOrCreate() @@ -37,13 +44,18 @@ object SparkDownloadUpdateDatacite { import spark.implicits._ - - val maxDate: String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0) + val maxDate: String = spark.read + .load(workingPath) + .as[Oaf] + .filter(s => s.isInstanceOf[Result]) + .map(r => r.asInstanceOf[Result].getDateofcollection) + .select(max("value")) + .first() + .getString(0) val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) val string_to_date = ISO8601FORMAT.parse(maxDate) val ts = string_to_date.getTime - } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala new file mode 100644 index 000000000..ffdab1799 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -0,0 +1,597 @@ +package eu.dnetlib.dhp.sx.bio + +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils} +import eu.dnetlib.dhp.schema.oaf._ +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.{compact, parse, render} +import collection.JavaConverters._ + +object BioDBToOAF { + + case class EBILinkItem(id: Long, links: String) {} + + case class EBILinks( + relType: String, + date: String, + title: String, + pmid: String, + targetPid: String, + targetPidType: String, + targetUrl: String + ) {} + + case class UniprotDate(date: String, date_info: String) {} + + case class ScholixResolved( + pid: String, + pidType: String, + typology: String, + tilte: List[String], + datasource: List[String], + date: List[String], + authors: List[String] + ) {} + + val DATA_INFO: DataInfo = OafMapperUtils.dataInfo( + false, + null, + false, + false, + ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, + "0.9" + ) + val SUBJ_CLASS = "Keywords" + + val DATE_RELATION_KEY = "RelationDate" + + val resolvedURL: Map[String, String] = Map( + "genbank" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-wgs" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-p" -> "https://www.ncbi.nlm.nih.gov/protein/", + "ena" -> "https://www.ebi.ac.uk/ena/browser/view/", + "clinicaltrials.gov" -> "https://clinicaltrials.gov/ct2/show/", + "onim" -> "https://omim.org/entry/", + "refseq" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "geo" -> "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" + ) + + val collectedFromMap: Map[String, KeyValue] = { + val PDBCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", + "Protein Data Bank" + ) + val enaCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|re3data_____::c2a591f440598b63d854556beaf01591", + "European Nucleotide Archive" + ) + val ncbiCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|re3data_____::7d4f90870fe1e493232c9e86c43ae6f6", + "NCBI Nucleotide" + ) + val UNIPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|re3data_____::296e1abaf1302897a6838d3588cd0310", + "UniProtKB/Swiss-Prot" + ) + val ElsevierCollectedFrom: KeyValue = + OafMapperUtils.keyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier") + val springerNatureCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|openaire____::6e380d9cf51138baec8480f5a0ce3a2e", + "Springer Nature" + ) + val EBICollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|opendoar____::83e60e09c222f206c725385f53d7e567c", + "EMBL-EBIs Protein Data Bank in Europe (PDBe)" + ) + val pubmedCollectedFrom: KeyValue = + OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + + UNIPROTCollectedFrom.setDataInfo(DATA_INFO) + PDBCollectedFrom.setDataInfo(DATA_INFO) + ElsevierCollectedFrom.setDataInfo(DATA_INFO) + EBICollectedFrom.setDataInfo(DATA_INFO) + pubmedCollectedFrom.setDataInfo(DATA_INFO) + enaCollectedFrom.setDataInfo(DATA_INFO) + ncbiCollectedFrom.setDataInfo(DATA_INFO) + springerNatureCollectedFrom.setDataInfo(DATA_INFO) + + Map( + "uniprot" -> UNIPROTCollectedFrom, + "pdb" -> PDBCollectedFrom, + "elsevier" -> ElsevierCollectedFrom, + "ebi" -> EBICollectedFrom, + "Springer Nature" -> springerNatureCollectedFrom, + "NCBI Nucleotide" -> ncbiCollectedFrom, + "European Nucleotide Archive" -> enaCollectedFrom, + "Europe PMC" -> pubmedCollectedFrom + ) + } + + def crossrefLinksToOaf(input: String): Oaf = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + val source_pid = (json \ "Source" \ "Identifier" \ "ID").extract[String].toLowerCase + val source_pid_type = (json \ "Source" \ "Identifier" \ "IDScheme").extract[String].toLowerCase + + val target_pid = (json \ "Target" \ "Identifier" \ "ID").extract[String].toLowerCase + val target_pid_type = (json \ "Target" \ "Identifier" \ "IDScheme").extract[String].toLowerCase + + val relation_semantic = (json \ "RelationshipType" \ "Name").extract[String] + + val date = GraphCleaningFunctions.cleanDate((json \ "LinkedPublicationDate").extract[String]) + + createRelation( + target_pid, + target_pid_type, + generate_unresolved_id(source_pid, source_pid_type), + collectedFromMap("elsevier"), + "relationship", + relation_semantic, + date + ) + + } + + def scholixResolvedToOAF(input: ScholixResolved): Oaf = { + + val d = new Dataset + + d.setPid( + List( + OafMapperUtils.structuredProperty( + input.pid.toLowerCase, + input.pidType.toLowerCase, + input.pidType.toLowerCase, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) + ).asJava + ) + + d.setDataInfo(DATA_INFO) + + val nsPrefix = input.pidType.toLowerCase.padTo(12, '_') + d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.pid.toLowerCase}", true)) + + if (input.tilte != null && input.tilte.nonEmpty) + d.setTitle( + List( + OafMapperUtils.structuredProperty( + input.tilte.head, + ModelConstants.MAIN_TITLE_QUALIFIER, + DATA_INFO + ) + ).asJava + ) + + d.setOriginalId(List(input.pid).asJava) + val i = new Instance + + i.setPid(d.getPid) + + if (resolvedURL.contains(input.pidType)) { + i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava) + } + + if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) + i.setInstancetype( + OafMapperUtils.qualifier( + "0037", + "Clinical Trial", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + else + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + + if (input.datasource == null || input.datasource.isEmpty) + return null + + val ds = input.datasource.head + d.setCollectedfrom(List(collectedFromMap(ds)).asJava) + i.setCollectedfrom(collectedFromMap(ds)) + d.setInstance(List(i).asJava) + + if (input.authors != null && input.authors.nonEmpty) { + val authors = input.authors.map(a => { + val authorOAF = new Author + authorOAF.setFullname(a) + authorOAF + }) + d.setAuthor(authors.asJava) + } + if (input.date != null && input.date.nonEmpty) { + val dt = input.date.head + i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) + d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) + } + d + } + + def uniprotToOAF(input: String): List[Oaf] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + val pid = (json \ "pid").extract[String] + + val d = new Dataset + + d.setPid( + List( + OafMapperUtils.structuredProperty( + pid, + "uniprot", + "uniprot", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) + ).asJava + ) + + d.setDataInfo(DATA_INFO) + d.setId(OafMapperUtils.createOpenaireId(50, s"uniprot_____::$pid", true)) + d.setCollectedfrom(List(collectedFromMap("uniprot")).asJava) + + val title: String = (json \ "title").extractOrElse[String](null) + + if (title != null) + d.setTitle( + List( + OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO) + ).asJava + ) + + d.setOriginalId(List(pid).asJava) + val i = new Instance + + i.setPid(d.getPid) + i.setUrl(List(s"https://www.uniprot.org/uniprot/$pid").asJava) + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + + i.setCollectedfrom(collectedFromMap("uniprot")) + d.setInstance(List(i).asJava) + + val dates: List[UniprotDate] = for { + JObject(dateOBJ) <- json \ "dates" + JField("date", JString(date)) <- dateOBJ + JField("date_info", JString(date_info)) <- dateOBJ + } yield UniprotDate(GraphCleaningFunctions.cleanDate(date), date_info) + + val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null) + + if (subjects != null) { + d.setSubject( + subjects + .map(s => + OafMapperUtils.structuredProperty( + s, + SUBJ_CLASS, + SUBJ_CLASS, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + null + ) + ) + .asJava + ) + } + var i_date: Option[UniprotDate] = None + + if (dates.nonEmpty) { + i_date = dates.find(d => d.date_info.contains("entry version")) + if (i_date.isDefined) { + i.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) + d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) + } + val relevant_dates: List[StructuredProperty] = dates + .filter(d => !d.date_info.contains("entry version")) + .map(date => + OafMapperUtils.structuredProperty( + date.date, + ModelConstants.UNKNOWN, + ModelConstants.UNKNOWN, + ModelConstants.DNET_DATACITE_DATE, + ModelConstants.DNET_DATACITE_DATE, + DATA_INFO + ) + ) + if (relevant_dates != null && relevant_dates.nonEmpty) + d.setRelevantdate(relevant_dates.asJava) + d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) + } + + val references_pmid: List[String] = for { + JObject(reference) <- json \ "references" + JField("PubMed", JString(pid)) <- reference + } yield pid + + val references_doi: List[String] = for { + JObject(reference) <- json \ "references" + JField(" DOI", JString(pid)) <- reference + } yield pid + + if (references_pmid != null && references_pmid.nonEmpty) { + val rel = createRelation( + references_pmid.head, + "pmid", + d.getId, + collectedFromMap("uniprot"), + ModelConstants.RELATIONSHIP, + ModelConstants.IS_RELATED_TO, + if (i_date.isDefined) i_date.get.date else null + ) + rel.getCollectedfrom + List(d, rel) + } else if (references_doi != null && references_doi.nonEmpty) { + val rel = createRelation( + references_doi.head, + "doi", + d.getId, + collectedFromMap("uniprot"), + ModelConstants.RELATIONSHIP, + ModelConstants.IS_RELATED_TO, + if (i_date.isDefined) i_date.get.date else null + ) + List(d, rel) + } else + List(d) + } + + def generate_unresolved_id(pid: String, pidType: String): String = { + s"unresolved::$pid::$pidType" + } + + def createRelation( + pid: String, + pidType: String, + sourceId: String, + collectedFrom: KeyValue, + subRelType: String, + relClass: String, + date: String + ): Relation = { + + val rel = new Relation + rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) + rel.setDataInfo(DATA_INFO) + + rel.setRelType(ModelConstants.RESULT_RESULT) + rel.setSubRelType(subRelType) + rel.setRelClass(relClass) + + rel.setSource(sourceId) + rel.setTarget(s"unresolved::$pid::$pidType") + + val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) + + rel.setProperties(List(dateProps).asJava) + + rel.getTarget.startsWith("unresolved") + rel.setCollectedfrom(List(collectedFrom).asJava) + rel + + } + + def createSupplementaryRelation( + pid: String, + pidType: String, + sourceId: String, + collectedFrom: KeyValue, + date: String + ): Relation = { + createRelation( + pid, + pidType, + sourceId, + collectedFrom, + ModelConstants.SUPPLEMENT, + ModelConstants.IS_SUPPLEMENT_TO, + date + ) + } + + def pdbTOOaf(input: String): List[Oaf] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + val pdb = (json \ "pdb").extract[String].toLowerCase + + if (pdb.isEmpty) + return List() + + val d = new Dataset + + d.setPid( + List( + OafMapperUtils.structuredProperty( + pdb, + "pdb", + "Protein Data Bank Identifier", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) + ).asJava + ) + + d.setCollectedfrom(List(collectedFromMap("pdb")).asJava) + d.setDataInfo(DATA_INFO) + d.setId(OafMapperUtils.createOpenaireId(50, s"pdb_________::$pdb", true)) + d.setOriginalId(List(pdb).asJava) + + val title = (json \ "title").extractOrElse[String](null) + + if (title == null) + return List() + d.setTitle( + List( + OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO) + ).asJava + ) + + val authors: List[String] = (json \ "authors").extractOrElse[List[String]](null) + + if (authors != null) { + val convertedAuthors = authors.zipWithIndex.map { a => + val res = new Author + res.setFullname(a._1) + res.setRank(a._2 + 1) + res + } + + d.setAuthor(convertedAuthors.asJava) + } + + val i = new Instance + + i.setPid(d.getPid) + i.setUrl(List(s"https://www.rcsb.org/structure/$pdb").asJava) + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + + i.setCollectedfrom(collectedFromMap("pdb")) + d.setInstance(List(i).asJava) + + val pmid = (json \ "pmid").extractOrElse[String](null) + + if (pmid != null) + List(d, createSupplementaryRelation(pmid, "pmid", d.getId, collectedFromMap("pdb"), null)) + else + List(d) + } + + def extractEBILinksFromDump(input: String): EBILinkItem = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + + val pmid = (json \ "publication" \ "pmid").extract[String] + val links = (json \ "links").extract[JObject] + EBILinkItem(pmid.toLong, compact(render(links))) + } + + def EBITargetLinksFilter(input: EBILinks): Boolean = { + + input.targetPidType.equalsIgnoreCase("ena") || input.targetPidType.equalsIgnoreCase( + "pdb" + ) || input.targetPidType.equalsIgnoreCase("uniprot") + + } + + def parse_ebi_links(input: String): List[EBILinks] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + val pmid = (json \ "request" \ "id").extract[String] + for { + JObject(link) <- json \\ "Link" + JField("Target", JObject(target)) <- link + JField("RelationshipType", JObject(relType)) <- link + JField("Name", JString(relation)) <- relType + JField("PublicationDate", JString(publicationDate)) <- link + JField("Title", JString(title)) <- target + JField("Identifier", JObject(identifier)) <- target + JField("IDScheme", JString(idScheme)) <- identifier + JField("IDURL", JString(idUrl)) <- identifier + JField("ID", JString(id)) <- identifier + + } yield EBILinks( + relation, + GraphCleaningFunctions.cleanDate(publicationDate), + title, + pmid, + id, + idScheme, + idUrl + ) + } + + def convertEBILinksToOaf(input: EBILinks): List[Oaf] = { + val d = new Dataset + d.setCollectedfrom(List(collectedFromMap("ebi")).asJava) + d.setDataInfo(DATA_INFO) + d.setTitle( + List( + OafMapperUtils.structuredProperty( + input.title, + ModelConstants.MAIN_TITLE_QUALIFIER, + DATA_INFO + ) + ).asJava + ) + + val nsPrefix = input.targetPidType.toLowerCase.padTo(12, '_') + + d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.targetPid.toLowerCase}", true)) + d.setOriginalId(List(input.targetPid.toLowerCase).asJava) + + d.setPid( + List( + OafMapperUtils.structuredProperty( + input.targetPid.toLowerCase, + input.targetPidType.toLowerCase, + "Protein Data Bank Identifier", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + DATA_INFO + ) + ).asJava + ) + + val i = new Instance + + i.setPid(d.getPid) + i.setUrl(List(input.targetUrl).asJava) + i.setInstancetype( + OafMapperUtils.qualifier( + "0046", + "Bioentity", + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + + i.setCollectedfrom(collectedFromMap("ebi")) + d.setInstance(List(i).asJava) + i.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO) + ) + d.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO) + ) + + List( + d, + createRelation( + input.pmid, + "pmid", + d.getId, + collectedFromMap("ebi"), + ModelConstants.RELATIONSHIP, + ModelConstants.IS_RELATED_TO, + GraphCleaningFunctions.cleanDate(input.date) + ) + ) + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 51% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 8ae8285e3..96075b4f3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Oaf -import BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkTransformBioDatabaseToOAF { @@ -14,7 +14,11 @@ object SparkTransformBioDatabaseToOAF { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val log: Logger = LoggerFactory.getLogger(getClass) - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json") + ) + ) parser.parseArgument(args) val database: String = parser.get("database") log.info("database: {}", database) @@ -29,20 +33,33 @@ object SparkTransformBioDatabaseToOAF { .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sc = spark.sparkContext implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))), + targetPath + ) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))), + targetPath + ) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)), + targetPath + ) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))), + targetPath + ) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 67% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 17d21f19c..9c55ec7be 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import eu.dnetlib.dhp.sx.bio.pubmed._ import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration @@ -23,31 +24,37 @@ import scala.xml.pull.XMLEventReader object SparkCreateBaselineDataFrame { - def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = { val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") - val result = data.lines.filter(l => l.startsWith("") - val start = l.indexOf(" l.startsWith("") + val start = l.indexOf("= 0 && end > start) - l.substring(start + 9, end - start) - else - "" - }.filter(s => s.endsWith(".gz")).filter(s => s > maxFile).map(s => (s, s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList + if (start >= 0 && end > start) + l.substring(start + 9, end - start) + else + "" + } + .filter(s => s.endsWith(".gz")) + .filter(s => s > maxFile) + .map(s => (s, s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")) + .toList result } - def downloadBaselinePart(url: String): InputStream = { val r = new HttpGet(url) val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() val response = client.execute(r) println(s"get response with status${response.getStatusLine.getStatusCode}") @@ -58,10 +65,12 @@ object SparkCreateBaselineDataFrame { def requestPage(url: String): String = { val r = new HttpGet(url) val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() try { var tries = 4 @@ -72,8 +81,7 @@ object SparkCreateBaselineDataFrame { println(s"get response with status${response.getStatusLine.getStatusCode}") if (response.getStatusLine.getStatusCode > 400) { tries -= 1 - } - else + } else return IOUtils.toString(response.getEntity.getContent) } catch { case e: Throwable => @@ -89,10 +97,8 @@ object SparkCreateBaselineDataFrame { } } - def downloadBaseLineUpdate(baselinePath: String, hdfsServerUri: String): Unit = { - val conf = new Configuration conf.set("fs.defaultFS", hdfsServerUri) val fs = FileSystem.get(conf) @@ -121,31 +127,36 @@ object SparkCreateBaselineDataFrame { } + val pmArticleAggregator: Aggregator[(String, PMArticle), PMArticle, PMArticle] = + new Aggregator[(String, PMArticle), PMArticle, PMArticle] with Serializable { + override def zero: PMArticle = new PMArticle - val pmArticleAggregator: Aggregator[(String, PMArticle), PMArticle, PMArticle] = new Aggregator[(String, PMArticle), PMArticle, PMArticle] with Serializable { - override def zero: PMArticle = new PMArticle + override def reduce(b: PMArticle, a: (String, PMArticle)): PMArticle = { + if (b != null && b.getPmid != null) b else a._2 + } - override def reduce(b: PMArticle, a: (String, PMArticle)): PMArticle = { - if (b != null && b.getPmid != null) b else a._2 + override def merge(b1: PMArticle, b2: PMArticle): PMArticle = { + if (b1 != null && b1.getPmid != null) b1 else b2 + + } + + override def finish(reduction: PMArticle): PMArticle = reduction + + override def bufferEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] + + override def outputEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] } - override def merge(b1: PMArticle, b2: PMArticle): PMArticle = { - if (b1 != null && b1.getPmid != null) b1 else b2 - - } - - override def finish(reduction: PMArticle): PMArticle = reduction - - override def bufferEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] - - override def outputEncoder: Encoder[PMArticle] = Encoders.kryo[PMArticle] - } - - def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val log: Logger = LoggerFactory.getLogger(getClass) - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkEBILinksToOaf.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkEBILinksToOaf.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val isLookupUrl: String = parser.get("isLookupUrl") log.info("isLookupUrl: {}", isLookupUrl) @@ -161,7 +172,6 @@ object SparkCreateBaselineDataFrame { val skipUpdate = parser.get("skipUpdate") log.info("skipUpdate: {}", skipUpdate) - val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) val spark: SparkSession = @@ -169,7 +179,8 @@ object SparkCreateBaselineDataFrame { .builder() .config(conf) .appName(SparkEBILinksToOaf.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sc = spark.sparkContext import spark.implicits._ @@ -177,24 +188,35 @@ object SparkCreateBaselineDataFrame { implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) - implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) if (!"true".equalsIgnoreCase(skipUpdate)) { downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000) - val ds: Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i => { - val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) - new PMParser(xml) - })) - ds.map(p => (p.getPmid, p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1) + val ds: Dataset[PMArticle] = spark.createDataset( + k.filter(i => i._1.endsWith(".gz")) + .flatMap(i => { + val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) + new PMParser(xml) + }) + ) + ds.map(p => (p.getPmid, p))(Encoders.tuple(Encoders.STRING, PMEncoder)) + .groupByKey(_._1) .agg(pmArticleAggregator.toColumn) - .map(p => p._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset") + .map(p => p._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/baseline_dataset") } val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle] - exported_dataset - .map(a => PubMedToOaf.convert(a, vocabularies)).as[Result] - .filter(p => p != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + exported_dataset + .map(a => PubMedToOaf.convert(a, vocabularies)) + .as[Oaf] + .filter(p => p != null), + targetPath + ) + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 73% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index eab6b1dc6..44e9e22ea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,9 +1,8 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet @@ -26,10 +25,12 @@ object SparkDownloadEBILinks { def requestPage(url: String): String = { val r = new HttpGet(url) val timeout = 60; // seconds - val config = RequestConfig.custom() + val config = RequestConfig + .custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() + .setSocketTimeout(timeout * 1000) + .build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() try { var tries = 4 @@ -40,8 +41,7 @@ object SparkDownloadEBILinks { println(s"get response with status${response.getStatusLine.getStatusCode}") if (response.getStatusLine.getStatusCode > 400) { tries -= 1 - } - else + } else return IOUtils.toString(response.getEntity.getContent) } catch { case e: Throwable => @@ -67,14 +67,19 @@ object SparkDownloadEBILinks { val log: Logger = LoggerFactory.getLogger(getClass) val MAX_ITEM_PER_PARTITION = 20000 val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_download_update.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_download_update.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkEBILinksToOaf.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() import spark.implicits._ @@ -88,22 +93,40 @@ object SparkDownloadEBILinks { log.info(s"workingPath -> $workingPath") log.info("Getting max pubmedId where the links have already requested") - val links: Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] + val links: Dataset[EBILinkItem] = + spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] val lastPMIDRequested = links.map(l => l.id).select(max("value")).first.getLong(0) log.info("Retrieving PMID to request links") val pubmed = spark.read.load(s"$sourcePath/baseline_dataset").as[PMArticle] - pubmed.map(p => p.getPmid.toLong).where(s"value > $lastPMIDRequested").write.mode(SaveMode.Overwrite).save(s"$workingPath/id_to_request") + pubmed + .map(p => p.getPmid.toLong) + .where(s"value > $lastPMIDRequested") + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/id_to_request") val pmidToReq: Dataset[Long] = spark.read.load(s"$workingPath/id_to_request").as[Long] val total = pmidToReq.count() - spark.createDataset(pmidToReq.rdd.repartition((total / MAX_ITEM_PER_PARTITION).toInt).map(pmid => createEBILinks(pmid)).filter(l => l != null)).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_update") + spark + .createDataset( + pmidToReq.rdd + .repartition((total / MAX_ITEM_PER_PARTITION).toInt) + .map(pmid => createEBILinks(pmid)) + .filter(l => l != null) + ) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/links_update") - val updates: Dataset[EBILinkItem] = spark.read.load(s"$workingPath/links_update").as[EBILinkItem] + val updates: Dataset[EBILinkItem] = + spark.read.load(s"$workingPath/links_update").as[EBILinkItem] - links.union(updates).groupByKey(_.id) + links + .union(updates) + .groupByKey(_.id) .reduceGroups { (x, y) => if (x == null || x.links == null) y @@ -113,6 +136,10 @@ object SparkDownloadEBILinks { x else y - }.map(_._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_final") + } + .map(_._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/links_final") } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 61% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 8da617ca0..7cb6153ff 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -16,15 +15,19 @@ object SparkEBILinksToOaf { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkEBILinksToOaf.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() import spark.implicits._ val sourcePath = parser.get("sourcePath") @@ -33,12 +36,17 @@ object SparkEBILinksToOaf { log.info(s"targetPath -> $targetPath") implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val ebLinks: Dataset[EBILinkItem] = spark.read.load(sourcePath).as[EBILinkItem].filter(l => l.links != null && l.links.startsWith("{")) + val ebLinks: Dataset[EBILinkItem] = spark.read + .load(sourcePath) + .as[EBILinkItem] + .filter(l => l.links != null && l.links.startsWith("{")) - ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) - .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) - .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + ebLinks + .flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) + .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) + .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)), + targetPath + ) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala new file mode 100644 index 000000000..49a271641 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala @@ -0,0 +1,133 @@ +package eu.dnetlib.dhp.sx.bio.pubmed + +import scala.xml.MetaData +import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader} + +/** @param xml + */ +class PMParser(xml: XMLEventReader) extends Iterator[PMArticle] { + + var currentArticle: PMArticle = generateNextArticle() + + override def hasNext: Boolean = currentArticle != null + + override def next(): PMArticle = { + val tmp = currentArticle + currentArticle = generateNextArticle() + tmp + } + + def extractAttributes(attrs: MetaData, key: String): String = { + + val res = attrs.get(key) + if (res.isDefined) { + val s = res.get + if (s != null && s.nonEmpty) + s.head.text + else + null + } else null + } + + def validate_Date(year: String, month: String, day: String): String = { + try { + f"${year.toInt}-${month.toInt}%02d-${day.toInt}%02d" + + } catch { + case _: Throwable => null + } + } + + def generateNextArticle(): PMArticle = { + + var currentSubject: PMSubject = null + var currentAuthor: PMAuthor = null + var currentJournal: PMJournal = null + var currentGrant: PMGrant = null + var currNode: String = null + var currentYear = "0" + var currentMonth = "01" + var currentDay = "01" + var currentArticleType: String = null + + while (xml.hasNext) { + xml.next match { + case EvElemStart(_, label, attrs, _) => + currNode = label + + label match { + case "PubmedArticle" => currentArticle = new PMArticle + case "Author" => currentAuthor = new PMAuthor + case "Journal" => currentJournal = new PMJournal + case "Grant" => currentGrant = new PMGrant + case "PublicationType" | "DescriptorName" => + currentSubject = new PMSubject + currentSubject.setMeshId(extractAttributes(attrs, "UI")) + case "ArticleId" => currentArticleType = extractAttributes(attrs, "IdType") + case _ => + } + case EvElemEnd(_, label) => + label match { + case "PubmedArticle" => return currentArticle + case "Author" => currentArticle.getAuthors.add(currentAuthor) + case "Journal" => currentArticle.setJournal(currentJournal) + case "Grant" => currentArticle.getGrants.add(currentGrant) + case "PubMedPubDate" => + if (currentArticle.getDate == null) + currentArticle.setDate(validate_Date(currentYear, currentMonth, currentDay)) + case "PubDate" => currentJournal.setDate(s"$currentYear-$currentMonth-$currentDay") + case "DescriptorName" => currentArticle.getSubjects.add(currentSubject) + case "PublicationType" => currentArticle.getPublicationTypes.add(currentSubject) + case _ => + } + case EvText(text) => + if (currNode != null && text.trim.nonEmpty) + currNode match { + case "ArticleTitle" => { + if (currentArticle.getTitle == null) + currentArticle.setTitle(text.trim) + else + currentArticle.setTitle(currentArticle.getTitle + text.trim) + } + case "AbstractText" => { + if (currentArticle.getDescription == null) + currentArticle.setDescription(text.trim) + else + currentArticle.setDescription(currentArticle.getDescription + text.trim) + } + case "PMID" => currentArticle.setPmid(text.trim) + case "ArticleId" => + if ("doi".equalsIgnoreCase(currentArticleType)) currentArticle.setDoi(text.trim) + case "Language" => currentArticle.setLanguage(text.trim) + case "ISSN" => currentJournal.setIssn(text.trim) + case "GrantID" => currentGrant.setGrantID(text.trim) + case "Agency" => currentGrant.setAgency(text.trim) + case "Country" => if (currentGrant != null) currentGrant.setCountry(text.trim) + case "Year" => currentYear = text.trim + case "Month" => currentMonth = text.trim + case "Day" => currentDay = text.trim + case "Volume" => currentJournal.setVolume(text.trim) + case "Issue" => currentJournal.setIssue(text.trim) + case "PublicationType" | "DescriptorName" => currentSubject.setValue(text.trim) + case "LastName" => { + if (currentAuthor != null) + currentAuthor.setLastName(text.trim) + } + case "ForeName" => + if (currentAuthor != null) + currentAuthor.setForeName(text.trim) + case "Title" => + if (currentJournal.getTitle == null) + currentJournal.setTitle(text.trim) + else + currentJournal.setTitle(currentJournal.getTitle + text.trim) + case _ => + + } + case _ => + } + + } + null + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 61% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index ecef32202..92ad22c57 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -4,36 +4,43 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ -import scala.collection.JavaConverters._ +import collection.JavaConverters._ import java.util.regex.Pattern /** - * - */ + */ object PubMedToOaf { val SUBJ_CLASS = "keywords" + val urlMap = Map( "pmid" -> "https://pubmed.ncbi.nlm.nih.gov/", - "doi" -> "https://dx.doi.org/" + "doi" -> "https://dx.doi.org/" ) - val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") - val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + val dataInfo: DataInfo = OafMapperUtils.dataInfo( + false, + null, + false, + false, + ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, + "0.9" + ) + val collectedFrom: KeyValue = + OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") - /** - * Cleaning the DOI Applying regex in order to - * remove doi starting with URL - * @param doi input DOI - * @return cleaned DOI - */ + /** Cleaning the DOI Applying regex in order to + * remove doi starting with URL + * + * @param doi input DOI + * @return cleaned DOI + */ def cleanDoi(doi: String): String = { val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$" - val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) val matcher = pattern.matcher(doi) @@ -43,33 +50,34 @@ object PubMedToOaf { null } - /** - * - * Create an instance of class extends Result - * starting from OAF instanceType value - * - * @param cobjQualifier OAF instance type - * @param vocabularies All dnet vocabularies - * @return the correct instance - */ + /** Create an instance of class extends Result + * starting from OAF instanceType value + * + * @param cobjQualifier OAF instance type + * @param vocabularies All dnet vocabularies + * @return the correct instance + */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { - val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) + val result_typologies = getVocabularyTerm( + ModelConstants.DNET_RESULT_TYPOLOGIES, + vocabularies, + cobjQualifier.getClassid + ) result_typologies.getClassid match { - case "dataset" => new Dataset + case "dataset" => new Dataset case "publication" => new Publication - case "other" => new OtherResearchProduct - case "software" => new Software - case _ => null + case "other" => new OtherResearchProduct + case "software" => new Software + case _ => null } } - /** - * Mapping the Pubmedjournal info into the OAF Journale - * - * @param j the pubmedJournal - * @return the OAF Journal - */ + /** Mapping the Pubmedjournal info into the OAF Journale + * + * @param j the pubmedJournal + * @return the OAF Journal + */ def mapJournal(j: PMJournal): Journal = { if (j == null) return null @@ -83,42 +91,47 @@ object PubMedToOaf { journal.setIss(j.getIssue) journal - } - /** - * - * Find vocabulary term into synonyms and term in the vocabulary - * - * @param vocabularyName the input vocabulary name - * @param vocabularies all the vocabularies - * @param term the term to find - * - * @return the cleaned term value - */ - def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { + /** Find vocabulary term into synonyms and term in the vocabulary + * + * @param vocabularyName the input vocabulary name + * @param vocabularies all the vocabularies + * @param term the term to find + * @return the cleaned term value + */ + def getVocabularyTerm( + vocabularyName: String, + vocabularies: VocabularyGroup, + term: String + ): Qualifier = { val a = vocabularies.getSynonymAsQualifier(vocabularyName, term) val b = vocabularies.getTermAsQualifier(vocabularyName, term) if (a == null) b else a } - - /** - * Map the Pubmed Article into the OAF instance - * - * - * @param article the pubmed articles - * @param vocabularies the vocabularies - * @return The OAF instance if the mapping did not fail - */ - def convert(article: PMArticle, vocabularies: VocabularyGroup): Result = { + /** Map the Pubmed Article into the OAF instance + * + * @param article the pubmed articles + * @param vocabularies the vocabularies + * @return The OAF instance if the mapping did not fail + */ + def convert(article: PMArticle, vocabularies: VocabularyGroup): Oaf = { if (article.getPublicationTypes == null) return null - // MAP PMID into pid with classid = classname = pmid - val pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) + val pidList: List[StructuredProperty] = List( + OafMapperUtils.structuredProperty( + article.getPmid, + PidType.pmid.toString, + PidType.pmid.toString, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + dataInfo + ) + ) if (pidList == null) return null @@ -127,7 +140,14 @@ object PubMedToOaf { if (article.getDoi != null) { val normalizedPid = cleanDoi(article.getDoi) if (normalizedPid != null) - alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo) + alternateIdentifier = OafMapperUtils.structuredProperty( + normalizedPid, + PidType.doi.toString, + PidType.doi.toString, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + dataInfo + ) } // INSTANCE MAPPING @@ -135,10 +155,12 @@ object PubMedToOaf { // If the article contains the typology Journal Article then we apply this type //else We have to find a terms that match the vocabulary otherwise we discard it - val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) + val ja = + article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) val pubmedInstance = new Instance if (ja.isDefined) { - val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) + val cojbCategory = + getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) pubmedInstance.setInstancetype(cojbCategory) } else { val i_type = article.getPublicationTypes.asScala @@ -157,7 +179,9 @@ object PubMedToOaf { if (alternateIdentifier != null) pubmedInstance.setAlternateIdentifier(List(alternateIdentifier).asJava) result.setInstance(List(pubmedInstance).asJava) - pubmedInstance.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection.breakOut) + pubmedInstance.getPid.asScala + .filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)) + .map(p => p.getValue)(collection.breakOut) //CREATE URL From pmid val urlLists: List[String] = pidList .map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue)) @@ -167,7 +191,9 @@ object PubMedToOaf { pubmedInstance.setUrl(urlLists.asJava) //ASSIGN DateofAcceptance - pubmedInstance.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) + pubmedInstance.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo) + ) //ASSIGN COLLECTEDFROM pubmedInstance.setCollectedfrom(collectedFrom) result.setPid(pidList.asJava) @@ -175,7 +201,6 @@ object PubMedToOaf { //END INSTANCE MAPPING //-------------------------------------------------------------------------------------- - // JOURNAL MAPPING //-------------------------------------------------------------------------------------- if (article.getJournal != null && result.isInstanceOf[Publication]) @@ -184,32 +209,48 @@ object PubMedToOaf { //END JOURNAL MAPPING //-------------------------------------------------------------------------------------- - - // RESULT MAPPING //-------------------------------------------------------------------------------------- - result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) + result.setDateofacceptance( + OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo) + ) if (article.getTitle == null || article.getTitle.isEmpty) return null - result.setTitle(List(OafMapperUtils.structuredProperty(article.getTitle, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava) + result.setTitle( + List( + OafMapperUtils.structuredProperty( + article.getTitle, + ModelConstants.MAIN_TITLE_QUALIFIER, + dataInfo + ) + ).asJava + ) if (article.getDescription != null && article.getDescription.nonEmpty) result.setDescription(List(OafMapperUtils.field(article.getDescription, dataInfo)).asJava) if (article.getLanguage != null) { - val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, article.getLanguage) + val term = + vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, article.getLanguage) if (term != null) result.setLanguage(term) } - - val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection.breakOut) + val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => + OafMapperUtils.structuredProperty( + s.getValue, + SUBJ_CLASS, + SUBJ_CLASS, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + dataInfo + ) + )(collection.breakOut) if (subjects != null) result.setSubject(subjects.asJava) - val authors: List[Author] = article.getAuthors.asScala.zipWithIndex.map { case (a, index) => val author = new Author() author.setName(a.getForeName) @@ -219,15 +260,12 @@ object PubMedToOaf { author }(collection.breakOut) - if (authors != null && authors.nonEmpty) result.setAuthor(authors.asJava) result.setOriginalId(pidList.map(s => s.getValue).asJava) - result.setId(article.getPmid) - // END RESULT MAPPING //-------------------------------------------------------------------------------------- val id = IdentifierFactory.createIdentifier(result) @@ -237,5 +275,4 @@ object PubMedToOaf { result } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala new file mode 100644 index 000000000..2618d466a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkRetrieveDataciteDelta.scala @@ -0,0 +1,345 @@ +package eu.dnetlib.dhp.sx.graph + +import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.datacite.{DataciteToOAFTransformation, DataciteType} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} +import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixResource} +import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary +import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils +import eu.dnetlib.dhp.utils.{DHPUtils, ISLookupClientFactory} +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.spark.sql.functions.max +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ +import java.text.SimpleDateFormat + +class SparkRetrieveDataciteDelta(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + + val ISO_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ssZ" + val simpleFormatter = new SimpleDateFormat(ISO_DATE_PATTERN) + + val SCHOLIX_RESOURCE_PATH_NAME = "scholixResource" + val DATACITE_OAF_PATH_NAME = "dataciteOAFUpdate" + val PID_MAP_PATH_NAME = "pidMap" + val RESOLVED_REL_PATH_NAME = "resolvedRelation" + val SCHOLIX_PATH_NAME = "scholix" + + def scholixResourcePath(workingPath: String) = s"$workingPath/$SCHOLIX_RESOURCE_PATH_NAME" + def dataciteOAFPath(workingPath: String) = s"$workingPath/$DATACITE_OAF_PATH_NAME" + def pidMapPath(workingPath: String) = s"$workingPath/$PID_MAP_PATH_NAME" + def resolvedRelationPath(workingPath: String) = s"$workingPath/$RESOLVED_REL_PATH_NAME" + def scholixPath(workingPath: String) = s"$workingPath/$SCHOLIX_PATH_NAME" + + /** Utility to parse Date in ISO8601 to epochMillis + * @param inputDate The String represents an input date in ISO8601 + * @return The relative epochMillis of parsed date + */ + def ISO8601toEpochMillis(inputDate: String): Long = { + simpleFormatter.parse(inputDate).getTime + } + + /** This method tries to retrieve the last collection date from all datacite + * records in HDFS. + * This method should be called before indexing scholexplorer to retrieve + * the delta of Datacite record to download, since from the generation of + * raw graph to the generation of Scholexplorer sometimes it takes 20 days + * @param spark + * @param entitiesPath + * @return the last collection date from the current scholexplorer Graph of the datacite records + */ + def retrieveLastCollectedFrom(spark: SparkSession, entitiesPath: String): Long = { + log.info("Retrieve last entities collected From") + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] + import spark.implicits._ + + val entitiesDS = spark.read + .load(s"$entitiesPath/*") + .as[Oaf] + .filter(o => o.isInstanceOf[Result]) + .map(r => r.asInstanceOf[Result]) + + val date = entitiesDS + .filter(r => r.getDateofcollection != null) + .map(_.getDateofcollection) + .select(max("value")) + .first + .getString(0) + + ISO8601toEpochMillis(date) / 1000 + } + + /** The method of update Datacite relationships on Scholexplorer + * needs some utilities data structures + * One is the scholixResource DS that stores all the nodes in the Scholix Graph + * in format ScholixResource + * @param summaryPath the path of the summary in Scholix + * @param workingPath the working path + * @param spark the spark session + */ + def generateScholixResource( + summaryPath: String, + workingPath: String, + spark: SparkSession + ): Unit = { + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + + log.info("Convert All summary to ScholixResource") + spark.read + .load(summaryPath) + .as[ScholixSummary] + .map(ScholixUtils.generateScholixResourceFromSummary)(scholixResourceEncoder) + .filter(r => r.getIdentifier != null && r.getIdentifier.size > 0) + .write + .mode(SaveMode.Overwrite) + .save(s"${scholixResourcePath(workingPath)}_native") + } + + /** This method convert the new Datacite Resource into Scholix Resource + * Needed to fill the source and the type of Scholix Relationships + * @param workingPath the Working Path + * @param spark The spark Session + */ + def addMissingScholixResource(workingPath: String, spark: SparkSession): Unit = { + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] + import spark.implicits._ + + spark.read + .load(dataciteOAFPath(workingPath)) + .as[Oaf] + .filter(_.isInstanceOf[Result]) + .map(_.asInstanceOf[Result]) + .map(ScholixUtils.generateScholixResourceFromResult) + .filter(r => r.getIdentifier != null && r.getIdentifier.size > 0) + .write + .mode(SaveMode.Overwrite) + .save(s"${scholixResourcePath(workingPath)}_update") + + val update = spark.read.load(s"${scholixResourcePath(workingPath)}_update").as[ScholixResource] + val native = spark.read.load(s"${scholixResourcePath(workingPath)}_native").as[ScholixResource] + val graph = update + .union(native) + .groupByKey(_.getDnetIdentifier) + .reduceGroups((a, b) => if (a != null && a.getDnetIdentifier != null) a else b) + .map(_._2) + graph.write.mode(SaveMode.Overwrite).save(s"${scholixResourcePath(workingPath)}_graph") + } + + /** This method get and Transform only datacite records with + * timestamp greater than timestamp + * @param datacitePath the datacite input Path + * @param timestamp the timestamp + * @param workingPath the working path where save the generated Dataset + * @param spark SparkSession + * @param vocabularies Vocabularies needed for transformation + */ + + def getDataciteUpdate( + datacitePath: String, + timestamp: Long, + workingPath: String, + spark: SparkSession, + vocabularies: VocabularyGroup + ): Long = { + import spark.implicits._ + val ds = spark.read.load(datacitePath).as[DataciteType] + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + val total = ds.filter(_.timestamp >= timestamp).count() + if (total > 0) { + ds.filter(_.timestamp >= timestamp) + .flatMap(d => + DataciteToOAFTransformation + .generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks = true) + ) + .flatMap(i => fixRelations(i)) + .filter(i => i != null) + .write + .mode(SaveMode.Overwrite) + .save(dataciteOAFPath(workingPath)) + } + total + } + + /** After added the new ScholixResource, we need to update the scholix Pid Map + * to intersected with the new Datacite Relations + * + * @param workingPath The working Path starting from save the new Map + * @param spark the spark session + */ + def generatePidMap(workingPath: String, spark: SparkSession): Unit = { + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + import spark.implicits._ + spark.read + .load(s"${scholixResourcePath(workingPath)}_graph") + .as[ScholixResource] + .flatMap(r => + r.getIdentifier.asScala + .map(i => DHPUtils.generateUnresolvedIdentifier(i.getIdentifier, i.getSchema)) + .map(t => (t, r.getDnetIdentifier)) + )(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .groupByKey(_._1) + .reduceGroups((a, b) => if (a != null && a._2 != null) a else b) + .map(_._2)(Encoders.tuple(Encoders.STRING, Encoders.STRING)) + .write + .mode(SaveMode.Overwrite) + .save(pidMapPath(workingPath)) + } + + /** This method resolve the datacite relation and filter the resolved + * relation + * @param workingPath the working path + * @param spark the spark session + */ + + def resolveUpdateRelation(workingPath: String, spark: SparkSession): Unit = { + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo[Relation] + import spark.implicits._ + + val pidMap = spark.read.load(pidMapPath(workingPath)).as[(String, String)] + + val unresolvedRelations: Dataset[(String, Relation)] = spark.read + .load(dataciteOAFPath(workingPath)) + .as[Oaf] + .filter(_.isInstanceOf[Relation]) + .map(_.asInstanceOf[Relation]) + .map { r => + if (r.getSource.startsWith("unresolved")) + (r.getSource, r) + else + (r.getTarget, r) + }(Encoders.tuple(Encoders.STRING, relationEncoder)) + + unresolvedRelations + .joinWith(pidMap, unresolvedRelations("_1").equalTo(pidMap("_1"))) + .map(t => { + val r = t._1._2 + val resolvedIdentifier = t._2._2 + if (r.getSource.startsWith("unresolved")) + r.setSource(resolvedIdentifier) + else + r.setTarget(resolvedIdentifier) + r + })(relationEncoder) + .filter(r => !(r.getSource.startsWith("unresolved") || r.getTarget.startsWith("unresolved"))) + .write + .mode(SaveMode.Overwrite) + .save(resolvedRelationPath(workingPath)) + } + + /** This method generate scholix starting from resolved relation + * + * @param workingPath + * @param spark + */ + def generateScholixUpdate(workingPath: String, spark: SparkSession): Unit = { + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.kryo[ScholixResource] + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo[Relation] + implicit val intermediateEncoder: Encoder[(String, Scholix)] = + Encoders.tuple(Encoders.STRING, scholixEncoder) + + val relations: Dataset[(String, Relation)] = spark.read + .load(resolvedRelationPath(workingPath)) + .as[Relation] + .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relationEncoder)) + + val id_summary: Dataset[(String, ScholixResource)] = spark.read + .load(s"${scholixResourcePath(workingPath)}_graph") + .as[ScholixResource] + .map(r => (r.getDnetIdentifier, r))(Encoders.tuple(Encoders.STRING, scholixResourceEncoder)) + + id_summary.cache() + + relations + .joinWith(id_summary, relations("_1").equalTo(id_summary("_1")), "inner") + .map(t => (t._1._2.getTarget, ScholixUtils.scholixFromSource(t._1._2, t._2._2))) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/scholix_one_verse") + + val source_scholix: Dataset[(String, Scholix)] = + spark.read.load(s"$workingPath/scholix_one_verse").as[(String, Scholix)] + + source_scholix + .joinWith(id_summary, source_scholix("_1").equalTo(id_summary("_1")), "inner") + .map(t => { + val target: ScholixResource = t._2._2 + val scholix: Scholix = t._1._2 + ScholixUtils.generateCompleteScholix(scholix, target) + })(scholixEncoder) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/scholix") + } + + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { + val sourcePath = parser.get("sourcePath") + log.info(s"SourcePath is '$sourcePath'") + + val datacitePath = parser.get("datacitePath") + log.info(s"DatacitePath is '$datacitePath'") + + val workingPath = parser.get("workingSupportPath") + log.info(s"workingPath is '$workingPath'") + + val isLookupUrl: String = parser.get("isLookupUrl") + log.info("isLookupUrl: {}", isLookupUrl) + + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) + val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) + require(vocabularies != null) + + val updateDS: Boolean = "true".equalsIgnoreCase(parser.get("updateDS")) + log.info(s"updateDS is '$updateDS'") + + var lastCollectionDate = 0L + if (updateDS) { + generateScholixResource(s"$sourcePath/provision/summaries", workingPath, spark) + log.info("Retrieve last entities collected From starting from scholix Graph") + lastCollectionDate = retrieveLastCollectedFrom(spark, s"$sourcePath/entities") + } else { + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) + fs.delete(new Path(s"${scholixResourcePath(workingPath)}_native"), true) + fs.rename( + new Path(s"${scholixResourcePath(workingPath)}_graph"), + new Path(s"${scholixResourcePath(workingPath)}_native") + ) + lastCollectionDate = retrieveLastCollectedFrom(spark, dataciteOAFPath(workingPath)) + } + + val numRecords = + getDataciteUpdate(datacitePath, lastCollectionDate, workingPath, spark, vocabularies) + if (numRecords > 0) { + addMissingScholixResource(workingPath, spark) + generatePidMap(workingPath, spark) + resolveUpdateRelation(workingPath, spark) + generateScholixUpdate(workingPath, spark) + } + } +} + +object SparkRetrieveDataciteDelta { + val log: Logger = LoggerFactory.getLogger(SparkRetrieveDataciteDelta.getClass) + + def main(args: Array[String]): Unit = { + new SparkRetrieveDataciteDelta( + "/eu/dnetlib/dhp/sx/graph/retrieve_datacite_delta_params.json", + args, + log + ).initialize().run() + } +} diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md index c0c756082..6c4e05d5f 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md @@ -1,9 +1,20 @@ ##DHP-Aggregation -This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records. +This module defines a set of oozie workflows for -Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure +1. the **collection** and **transformation** of metadata records. +2. the **integration** of new external information in the result + + +### Collection and Transformation + +The workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping of each MDStore. -It defines [mappings](mappings.md) for transformation of different datasource (See mapping section). \ No newline at end of file +It defines [mappings](mappings.md) for transformation of different datasource (See mapping section). + +### Integration of external information in the result + +The workflows create new entity in the OpenAIRE format (OAF) whose aim is to enrich the result already contained in the graph. +See integration section for more insight diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md new file mode 100644 index 000000000..7b763c681 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/integration.md @@ -0,0 +1,36 @@ +DHP Aggregation - Integration method +===================================== + +The integration method can be applied every time new information, which is not aggregated from the repositories +nor computed directly by OpenAIRE, should be added to the results of the graph. + +The information integrated so far is: + +1. Article impact measures + 1. [Bip!Finder](https://dl.acm.org/doi/10.1145/3357384.3357850) scores +2. Result Subjects + 1. Integration of Fields of Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in + results subjects. + + +The method always consists in the creation of a new entity in the OpenAIRE format (OAF entity) containing only the id +and the element in the OAF model that should be used to map the information we want to integrate. + +The id is set by using a particular encoding of the given PID + +*unresolved::[pid]::[pidtype]* + +where + +1. *unresolved* is a constant value +2. *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307 +3. *pidtype* is the persistent id type, e.g. doi + +Such entities are matched against those available in the graph using the result.instance.pid values. + +This mechanism can be used to integrate enrichments produced as associated by a given PID. +If a match will be found with one of the results already in the graph that said result will be enriched with the information +present in the new OAF. +All the entities for which a match is not found are discarded. + + diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md index f6327a51b..c1813394b 100644 --- a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md @@ -4,13 +4,13 @@ This section describes the mapping implemented for [MEDLINE/PubMed](https://pubm Collection --------- The native data is collected from [ftp baseline](https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/) containing XML with -the following [shcema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html) +the following [schema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html) Parsing ------- -The resposible class of parsing is [PMParser](./scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates -an intermediate mapping of PubMed Article defined [here](/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html) +The resposible class of parsing is [PMParser](/dnet-hadoop/scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates +an intermediate mapping of PubMed Article defined [here](/dnet-hadoop/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html) Mapping @@ -50,6 +50,10 @@ The table below describes the mapping from the XML Native to the OAF mapping |//Author/FullName| author.Forename| Concatenation of forname + lastName if exist | |FOR ALL AUTHOR | author.rank| sequential number starting from 1| +#TODO + +Missing item mapped + diff --git a/dhp-workflows/dhp-aggregation/src/site/site.xml b/dhp-workflows/dhp-aggregation/src/site/site.xml index da5da0f1e..75fc5032e 100644 --- a/dhp-workflows/dhp-aggregation/src/site/site.xml +++ b/dhp-workflows/dhp-aggregation/src/site/site.xml @@ -20,7 +20,9 @@ - + + + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index f2158748b..be82b9fc3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -28,6 +28,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; public class SparkAtomicActionScoreJobTest { @@ -69,13 +70,9 @@ public class SparkAtomicActionScoreJobTest { } @Test - void matchOne() throws Exception { + void testMatch() throws Exception { String bipScoresPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") - .getPath(); - String inputPath = getClass() - .getResource( - "/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json") + .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json") .getPath(); SparkAtomicActionScoreJob @@ -84,234 +81,57 @@ public class SparkAtomicActionScoreJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-inputPath", - inputPath, - "-bipScorePath", + bipScoresPath, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", workingDir.toString() + "/actionSet" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Publication) aa.getPayload())); + .map(aa -> ((Result) aa.getPayload())); - assertEquals(1, tmp.count()); + assertEquals(4, tmp.count()); - Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); - verificationDataset.createOrReplaceTempView("publication"); + Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Result.class)); + verificationDataset.createOrReplaceTempView("result"); Dataset execVerification = spark .sql( - "Select p.id oaid, mes.id, mUnit.value from publication p " + + "Select p.id oaid, mes.id, mUnit.value from result p " + "lateral view explode(measures) m as mes " + "lateral view explode(mes.unit) u as mUnit "); - Assertions.assertEquals(2, execVerification.count()); - + Assertions.assertEquals(12, execVerification.count()); Assertions .assertEquals( - "50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb", - execVerification.select("oaid").collectAsList().get(0).getString(0)); - - Assertions - .assertEquals( - "1.47565045883e-08", - execVerification.filter("id = 'influence'").select("value").collectAsList().get(0).getString(0)); - - Assertions - .assertEquals( - "0.227515392", - execVerification.filter("id = 'popularity'").select("value").collectAsList().get(0).getString(0)); - - } - - @Test - void matchOneWithTwo() throws Exception { - String bipScoresPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") - .getPath(); - String inputPath = getClass() - .getResource( - "/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json") - .getPath(); - - SparkAtomicActionScoreJob - .main( - new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - inputPath, - "-bipScorePath", - bipScoresPath, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", - workingDir.toString() + "/actionSet" - }); - - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) - .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Publication) aa.getPayload())); - - assertEquals(1, tmp.count()); - - Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); - verificationDataset.createOrReplaceTempView("publication"); - - Dataset execVerification = spark - .sql( - "Select p.id oaid, mes.id, mUnit.value from publication p " + - "lateral view explode(measures) m as mes " + - "lateral view explode(mes.unit) u as mUnit "); - - Assertions.assertEquals(4, execVerification.count()); - - Assertions - .assertEquals( - "50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb", - execVerification.select("oaid").collectAsList().get(0).getString(0)); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'influence'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'popularity'").count()); - - List tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); - String tmp_influence = tmp_ds.get(0).getString(0); - assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); - - tmp_influence = tmp_ds.get(1).getString(0); - assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); - - assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0)); - - } - - @Test - void matchTwo() throws Exception { - String bipScoresPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") - .getPath(); - String inputPath = getClass() - .getResource( - "/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json") - .getPath(); - - SparkAtomicActionScoreJob - .main( - new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - inputPath, - "-bipScorePath", - bipScoresPath, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", - workingDir.toString() + "/actionSet" - }); - - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) - .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Publication) aa.getPayload())); - - assertEquals(2, tmp.count()); - - Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); - verificationDataset.createOrReplaceTempView("publication"); - - Dataset execVerification = spark - .sql( - "Select p.id oaid, mes.id, mUnit.value from publication p " + - "lateral view explode(measures) m as mes " + - "lateral view explode(mes.unit) u as mUnit "); - - Assertions.assertEquals(4, execVerification.count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'influence'").count()); - - Assertions - .assertEquals( - 2, - execVerification.filter("id = 'popularity'").count()); - - Assertions - .assertEquals( - "1.47565045883e-08", - execVerification + "6.63451994567e-09", execVerification .filter( - "oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " + + "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + "and id = 'influence'") .select("value") .collectAsList() .get(0) .getString(0)); - Assertions .assertEquals( - "1.98956540239e-08", - execVerification + "0.348694533145", execVerification .filter( - "oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " + - "and id = 'influence'") + "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + + "and id = 'popularity_alt'") .select("value") .collectAsList() .get(0) .getString(0)); - Assertions .assertEquals( - "0.282046161584", - execVerification + "2.16094680115e-09", execVerification .filter( - "oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " + - "and id = 'popularity'") - .select("value") - .collectAsList() - .get(0) - .getString(0)); - - Assertions - .assertEquals( - "0.227515392", - execVerification - .filter( - "oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " + + "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + "and id = 'popularity'") .select("value") .collectAsList() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index c48ccc8c2..cc8108bde 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -18,17 +18,14 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; -import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.schema.oaf.Result; public class PrepareTest { @@ -96,13 +93,18 @@ public class PrepareTest { String doi1 = "unresolved::10.0000/096020199389707::doi"; Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); - Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getMeasures().size()); + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().size()); + Assertions + .assertEquals( + 3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().get(0).getMeasures().size()); Assertions .assertEquals( "6.34596412687e-09", tmp .filter(r -> r.getId().equals(doi1)) .collect() .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("influence")) @@ -117,6 +119,8 @@ public class PrepareTest { .filter(r -> r.getId().equals(doi1)) .collect() .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("popularity_alt")) @@ -131,6 +135,8 @@ public class PrepareTest { .filter(r -> r.getId().equals(doi1)) .collect() .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .filter(sl -> sl.getId().equals("popularity")) @@ -140,34 +146,10 @@ public class PrepareTest { .get(0) .getValue()); - } + final String doi2 = "unresolved::10.3390/s18072310::doi"; - @Test - void getFOSFileTest() throws IOException, ClassNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv") - .getPath(); - final String outputPath = workingDir.toString() + "/fos.json"; - - new GetFOSData() - .doRewrite( - sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel", - '\t', fs); - - BufferedReader in = new BufferedReader( - new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class); - - System.out.println(new ObjectMapper().writeValueAsString(fos)); - count += 1; - } - - assertEquals(38, count); + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); } @@ -195,15 +177,8 @@ public class PrepareTest { String doi1 = "unresolved::10.3390/s18072310::doi"; - assertEquals(50, tmp.count()); + assertEquals(20, tmp.count()); assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); - assertTrue( - tmp - .filter(r -> r.getId().equals(doi1)) - .flatMap(r -> r.getSubject().iterator()) - .map(sbj -> sbj.getValue()) - .collect() - .contains("engineering and technology")); assertTrue( tmp @@ -211,16 +186,16 @@ public class PrepareTest { .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("nano-technology")); + .contains("04 agricultural and veterinary sciences")); assertTrue( tmp .filter(r -> r.getId().equals(doi1)) .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("nanoscience & nanotechnology")); + .contains("0404 agricultural biotechnology")); - String doi = "unresolved::10.1111/1365-2656.12831::doi"; + String doi = "unresolved::10.1007/s11164-020-04383-6::doi"; assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); assertTrue( tmp @@ -228,7 +203,7 @@ public class PrepareTest { .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("psychology and cognitive sciences")); + .contains("01 natural sciences")); assertTrue( tmp @@ -236,15 +211,114 @@ public class PrepareTest { .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("social sciences")); - assertFalse( + .contains("0104 chemical sciences")); + assertTrue( tmp .filter(r -> r.getId().equals(doi)) .flatMap(r -> r.getSubject().iterator()) .map(sbj -> sbj.getValue()) .collect() - .contains("NULL")); + .contains("010402 general chemistry")); } + @Test + void sdgPrepareTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") + .getPath(); + + PrepareSDGSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + + "-outputPath", workingDir.toString() + "/work" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/work/sdg") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + String doi1 = "unresolved::10.1001/amaguidesnewsletters.2019.sepoct02::doi"; + + assertEquals(32, tmp.count()); + assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); + + assertTrue( + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("3. Good health")); + assertTrue( + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("8. Economic growth")); + + Assertions.assertEquals(32, tmp.filter(row -> row.getDataInfo() != null).count()); + + } + +// @Test +// void test3() throws Exception { +// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz"; +// +// final String outputPath = workingDir.toString() + "/fos.json"; +// GetFOSSparkJob +// .main( +// new String[] { +// "--isSparkSessionManaged", Boolean.FALSE.toString(), +// "--sourcePath", sourcePath, +// +// "-outputPath", outputPath +// +// }); +// +// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); +// +// JavaRDD tmp = sc +// .textFile(outputPath) +// .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); +// +// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); +// +// } +// +// @Test +// void test4() throws Exception { +// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; +// +// final String outputPath = workingDir.toString() + "/sdg.json"; +// GetSDGSparkJob +// .main( +// new String[] { +// "--isSparkSessionManaged", Boolean.FALSE.toString(), +// "--sourcePath", sourcePath, +// +// "-outputPath", outputPath +// +// }); +// +// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); +// +// JavaRDD tmp = sc +// .textFile(outputPath) +// .map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class)); +// +// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null)); +// +// } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index b77b5bb36..a5ecaeabf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.Constants; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; @@ -67,70 +68,12 @@ public class ProduceTest { } @Test - void produceTest() throws Exception { + void produceTestSubjects() throws Exception { - final String bipPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") - .getPath(); - - PrepareBipFinder - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", bipPath, - "--outputPath", workingDir.toString() + "/work" - - }); - final String fosPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") - .getPath(); - - PrepareFOSSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", fosPath, - "-outputPath", workingDir.toString() + "/work" - }); - - SparkSaveUnresolved.main(new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", workingDir.toString() + "/work", - - "-outputPath", workingDir.toString() + "/unresolved" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/unresolved") - .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - - Assertions.assertEquals(135, tmp.count()); - - Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")).count()); - - Assertions - .assertEquals( - 3, tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .collect() - .get(0) - .getSubject() - .size()); - - Assertions - .assertEquals( - 3, tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .collect() - .get(0) - .getMeasures() - .size()); + JavaRDD tmp = getResultJavaRDD(); List sbjs = tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) + .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) .flatMap(row -> row.getSubject().iterator()) .collect(); @@ -172,14 +115,105 @@ public class ProduceTest { .assertEquals( ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemename())); + } - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("engineering and technology")); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology")); - sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nanoscience & nanotechnology")); + @Test + void produceTestMeasuress() throws Exception { + + JavaRDD tmp = getResultJavaRDD(); + + List mes = tmp + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getMeasures().iterator()) + .flatMap(m -> m.getUnit().iterator()) + .collect(); + + mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); + mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); + mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible())); + mes.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); + mes.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); + mes + .forEach( + sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid())); + mes + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + mes + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + mes + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); + } + + @Test + void produceTest6Subjects() throws Exception { + final String doi = "unresolved::10.3390/s18072310::doi"; + + JavaRDD tmp = getResultJavaRDD(); + + Assertions + .assertEquals( + 6, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); + + List sbjs = tmp + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); + + } + + @Test + void produceTest3Measures() throws Exception { + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + tmp + .filter(row -> row.getId().equals(doi)) + .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + Assertions + .assertEquals( + 3, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getMeasures() + .size()); List measures = tmp - .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) - .flatMap(row -> row.getMeasures().iterator()) + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(inst -> inst.getMeasures().iterator()) .collect(); Assertions .assertEquals( @@ -216,8 +250,82 @@ public class ProduceTest { Assertions .assertEquals( - 49, tmp - .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) + "10.3390/s18072310", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getValue() + .toLowerCase()); + + Assertions + .assertEquals( + "doi", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getQualifier() + .getClassid()); + + Assertions + .assertEquals( + "Digital Object Identifier", + tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getInstance() + .get(0) + .getPid() + .get(0) + .getQualifier() + .getClassname()); + + } + + @Test + void produceTestMeasures() throws Exception { + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + List mes = tmp + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .flatMap(row -> row.getInstance().iterator()) + .flatMap(i -> i.getPid().iterator()) + .collect(); + + Assertions.assertEquals(86, mes.size()); + + tmp + .filter(row -> row.getInstance() != null && row.getInstance().size() > 0) + .foreach( + e -> Assertions.assertEquals("sysimport:enrich", e.getDataInfo().getProvenanceaction().getClassid())); + + } + + @Test + void produceTestSomeNumbers() throws Exception { + + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDD(); + + Assertions.assertEquals(105, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); + + Assertions + .assertEquals( + 19, tmp + .filter(row -> !row.getId().equals(doi)) .filter(row -> row.getSubject() != null) .count()); @@ -225,10 +333,254 @@ public class ProduceTest { .assertEquals( 85, tmp - .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) - .filter(r -> r.getMeasures() != null) + .filter(row -> !row.getId().equals(doi)) + .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) .count()); } + private JavaRDD getResultJavaRDD() throws Exception { + final String bipPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") + .getPath(); + + PrepareBipFinder + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", bipPath, + "--outputPath", workingDir.toString() + "/work" + + }); + final String fosPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", fosPath, + "-outputPath", workingDir.toString() + "/work" + }); + + SparkSaveUnresolved.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", workingDir.toString() + "/work", + + "-outputPath", workingDir.toString() + "/unresolved" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + return sc + .textFile(workingDir.toString() + "/unresolved") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + } + + @Test + void prepareTest5Subjects() throws Exception { + final String doi = "unresolved::10.1063/5.0032658::doi"; + + JavaRDD tmp = getResultJavaRDD(); + + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); + + Assertions + .assertEquals( + 5, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); + + List sbjs = tmp + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0103 physical sciences"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0104 chemical sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry"))); + + } + + private JavaRDD getResultJavaRDDPlusSDG() throws Exception { + final String bipPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") + .getPath(); + + PrepareBipFinder + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", bipPath, + "--outputPath", workingDir.toString() + "/work" + + }); + final String fosPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", fosPath, + "-outputPath", workingDir.toString() + "/work" + }); + + final String sdgPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json") + .getPath(); + + PrepareSDGSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sdgPath, + "-outputPath", workingDir.toString() + "/work" + }); + + SparkSaveUnresolved.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", workingDir.toString() + "/work", + + "-outputPath", workingDir.toString() + "/unresolved" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + return sc + .textFile(workingDir.toString() + "/unresolved") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + } + + @Test + void produceTestSomeNumbersWithSDG() throws Exception { + + final String doi = "unresolved::10.3390/s18072310::doi"; + JavaRDD tmp = getResultJavaRDDPlusSDG(); + + Assertions.assertEquals(136, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); + + Assertions + .assertEquals( + 50, tmp + .filter(row -> !row.getId().equals(doi)) + .filter(row -> row.getSubject() != null) + .count()); + + Assertions + .assertEquals( + 85, + tmp + .filter(row -> !row.getId().equals(doi)) + .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) + .count()); + + } + + @Test + void produceTest7Subjects() throws Exception { + final String doi = "unresolved::10.3390/s18072310::doi"; + + JavaRDD tmp = getResultJavaRDDPlusSDG(); + + Assertions + .assertEquals( + 7, tmp + .filter(row -> row.getId().equals(doi)) + .collect() + .get(0) + .getSubject() + .size()); + + List sbjs = tmp + .filter(row -> row.getId().equals(doi)) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences"))); + + Assertions + .assertEquals( + true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science"))); + + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences"))); + Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics"))); + Assertions + .assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("1. No poverty"))); + + } + + @Test + void produceTestSubjectsWithSDG() throws Exception { + + JavaRDD tmp = getResultJavaRDDPlusSDG(); + + List sbjs_sdg = tmp + .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) + .flatMap(row -> row.getSubject().iterator()) + .filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID)) + .collect(); + + sbjs_sdg.forEach(sbj -> Assertions.assertEquals("SDG", sbj.getQualifier().getClassid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals( + "Sustainable Development Goals", sbj.getQualifier().getClassname())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); + + sbjs_sdg.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); + sbjs_sdg.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); + sbjs_sdg + .forEach( + sbj -> Assertions.assertEquals("subject:sdg", sbj.getDataInfo().getProvenanceaction().getClassid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + sbjs_sdg + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index 5a04dcefe..3e4ce750e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -76,7 +76,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -89,17 +89,17 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet1" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet1", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - assertEquals(60, tmp.count()); + assertEquals(62, tmp.count()); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); @@ -110,7 +110,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -121,17 +121,17 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet2" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet2", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - assertEquals(44, tmp.count()); + assertEquals(46, tmp.count()); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); @@ -142,7 +142,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -153,13 +153,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet3" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet3", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -175,7 +175,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -186,13 +186,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet4" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet4", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -215,7 +215,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -226,13 +226,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet5" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet5", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -240,8 +240,8 @@ public class CreateOpenCitationsASTest { assertEquals("citation", r.getSubRelType()); assertEquals("resultResult", r.getRelType()); }); - assertEquals(22, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); - assertEquals(22, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); + assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); + assertEquals(23, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); } @@ -250,7 +250,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -261,13 +261,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet6" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet6", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); @@ -295,7 +295,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -306,13 +306,13 @@ public class CreateOpenCitationsASTest { "-inputPath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet" + workingDir.toString() + "/actionSet7" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet7", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java new file mode 100644 index 000000000..629e95c8e --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -0,0 +1,140 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; +import eu.dnetlib.dhp.schema.oaf.Dataset; + +public class ReadCOCITest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(ReadCOCITest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(ReadCOCITest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ReadCOCITest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(ReadCOCITest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testReadCOCI() throws Exception { + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz")); + + ReadCOCI + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-workingPath", + workingDir.toString() + "/COCI", + "-outputPath", + workingDir.toString() + "/COCI_json/", + "-inputFile", "input1;input2;input3;input4;input5" + }); + + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/COCI_json/*/") + .map(item -> OBJECT_MAPPER.readValue(item, COCI.class)); + + Assertions.assertEquals(24, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count()); + + Assertions.assertEquals(8, tmp.filter(c -> c.getCiting().indexOf(".refs") > -1).count()); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala deleted file mode 100644 index f21e9eab1..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ /dev/null @@ -1,56 +0,0 @@ -package eu.dnetlib.dhp.datacite - - -import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} -import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest -import eu.dnetlib.dhp.schema.oaf.Oaf -import org.junit.jupiter.api.extension.ExtendWith -import org.junit.jupiter.api.{BeforeEach, Test} -import org.mockito.junit.jupiter.MockitoExtension - -import java.text.SimpleDateFormat -import java.util.Locale -import scala.io.Source - -@ExtendWith(Array(classOf[MockitoExtension])) -class DataciteToOAFTest extends AbstractVocabularyTest{ - - - @BeforeEach - def setUp() :Unit = { - - super.setUpVocabulary() - } - - - @Test - def testDateMapping:Unit = { - val inputDate = "2021-07-14T11:52:54+0000" - val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) - val dt = ISO8601FORMAT.parse(inputDate) - println(dt.getTime) - - - } - - - @Test - def testMapping() :Unit = { - val record =Source.fromInputStream(getClass.getResourceAsStream("record.json")).mkString - - - - val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) - val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true ) - - res.foreach(r => { - println (mapper.writeValueAsString(r)) - println("----------------------------") - - }) - - - - } - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala deleted file mode 100644 index 893a6e628..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ /dev/null @@ -1,190 +0,0 @@ -package eu.dnetlib.dhp.sx.bio - -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} -import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest -import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PubMedToOaf} -import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString} -import org.json4s.jackson.JsonMethods.parse -import org.junit.jupiter.api.Assertions._ -import org.junit.jupiter.api.extension.ExtendWith -import org.junit.jupiter.api.{BeforeEach, Test} -import org.mockito.junit.jupiter.MockitoExtension - -import java.io.{BufferedReader, InputStream, InputStreamReader} -import java.util.zip.GZIPInputStream -import scala.collection.JavaConverters._ -import scala.io.Source -import scala.xml.pull.XMLEventReader - -@ExtendWith(Array(classOf[MockitoExtension])) -class BioScholixTest extends AbstractVocabularyTest{ - - - val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false) - - @BeforeEach - def setUp() :Unit = { - - super.setUpVocabulary() - } - - class BufferedReaderIterator(reader: BufferedReader) extends Iterator[String] { - override def hasNext() = reader.ready - override def next() = reader.readLine() - } - - object GzFileIterator { - def apply(is: InputStream, encoding: String) = { - new BufferedReaderIterator( - new BufferedReader( - new InputStreamReader( - new GZIPInputStream( - is), encoding))) - } - } - - - - - @Test - def testEBIData() = { - val inputXML = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")).mkString - val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes())) - new PMParser(xml).foreach(s =>println(mapper.writeValueAsString(s))) - } - - - @Test - def testPubmedToOaf(): Unit = { - assertNotNull(vocabularies) - assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")).mkString - val r:List[Oaf] = records.lines.toList.map(s=>mapper.readValue(s, classOf[PMArticle])).map(a => PubMedToOaf.convert(a, vocabularies)) - assertEquals(10, r.size) - assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p))) - println(mapper.writeValueAsString(r.head)) - - - - } - - - @Test - def testPDBToOAF():Unit = { - - assertNotNull(vocabularies) - assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pdb_dump")).mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) - - val result:List[Oaf]= records.lines.toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) - - - - assertTrue(result.nonEmpty) - result.foreach(r => assertNotNull(r)) - - println(result.count(o => o.isInstanceOf[Relation])) - println(mapper.writeValueAsString(result.head)) - - } - - - @Test - def testUNIprotToOAF():Unit = { - - assertNotNull(vocabularies) - assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) - - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump")).mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) - - val result:List[Oaf]= records.lines.toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) - - - - assertTrue(result.nonEmpty) - result.foreach(r => assertNotNull(r)) - - println(result.count(o => o.isInstanceOf[Relation])) - println(mapper.writeValueAsString(result.head)) - - } - - case class EBILinks(relType:String, date:String, title:String, pmid:String, targetPid:String, targetPidType:String) {} - - def parse_ebi_links(input:String):List[EBILinks] ={ - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json = parse(input) - val pmid = (json \ "publication" \"pmid").extract[String] - for { - JObject(link) <- json \\ "Link" - JField("Target",JObject(target)) <- link - JField("RelationshipType",JObject(relType)) <- link - JField("Name", JString(relation)) <- relType - JField("PublicationDate",JString(publicationDate)) <- link - JField("Title", JString(title)) <- target - JField("Identifier",JObject(identifier)) <- target - JField("IDScheme", JString(idScheme)) <- identifier - JField("ID", JString(id)) <- identifier - - } yield EBILinks(relation, publicationDate, title, pmid, id, idScheme) - } - - - @Test - def testCrossrefLinksToOAF():Unit = { - - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/crossref_links")).mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) - - - val result:List[Oaf] =records.lines.map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList - - assertNotNull(result) - assertTrue(result.nonEmpty) - - println(mapper.writeValueAsString(result.head)) - - } - - @Test - def testEBILinksToOAF():Unit = { - val iterator = GzFileIterator(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz"), "UTF-8") - val data = iterator.next() - - val res = BioDBToOAF.parse_ebi_links(BioDBToOAF.extractEBILinksFromDump(data).links).filter(BioDBToOAF.EBITargetLinksFilter).flatMap(BioDBToOAF.convertEBILinksToOaf) - print(res.length) - - - println(mapper.writeValueAsString(res.head)) - - } - - - - - @Test - def scholixResolvedToOAF():Unit ={ - - val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved")).mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) - - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - - val l:List[ScholixResolved] = records.lines.map{input => - lazy val json = parse(input) - json.extract[ScholixResolved] - }.toList - - - val result:List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) - - assertTrue(result.nonEmpty) - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json deleted file mode 100644 index a4f3fa8b8..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json +++ /dev/null @@ -1,1101 +0,0 @@ -{"10.1001/10-v4n2-hsf10003": [{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}]} -{"10.1001/2012.jama.10456": [{"id": "influence", "unit": [{"value": "1.66083991022e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8690688", "key": "score"}]}]} -{"10.1001/2013.jamadermatol.729": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0279936", "key": "score"}]}]} -{"10.1001/archderm.107.6.853": [{"id": "influence", "unit": [{"value": "1.95361113607e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600657255944", "key": "score"}]}]} -{"10.1001/archderm.1926.02370130020002": [{"id": "influence", "unit": [{"value": "1.48611319422e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.20903253662e-16", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.157": [{"id": "influence", "unit": [{"value": "1.42089114725e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1002/bjs.1800830139": [{"id": "influence", "unit": [{"value": "3.73834652128e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.73174997252", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.013": [{"id": "influence", "unit": [{"value": "1.54849335991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4885963776", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.02.014": [{"id": "influence", "unit": [{"value": "1.43539656097e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.014": [{"id": "influence", "unit": [{"value": "1.65673923548e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.61188397056", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.015": [{"id": "influence", "unit": [{"value": "1.4250528394e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.05028397056", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.02.016": [{"id": "influence", "unit": [{"value": "1.41753303696e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.42336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.004": [{"id": "influence", "unit": [{"value": "2.40827378083e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3699383296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.016": [{"id": "influence", "unit": [{"value": "2.54472083616e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.12517350811", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.005": [{"id": "influence", "unit": [{"value": "1.40543881698e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.005": [{"id": "influence", "unit": [{"value": "4.98246170342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "13.4951290928", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.017": [{"id": "influence", "unit": [{"value": "2.8107768688e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.99055570262", "key": "score"}]}]} -{"10.1002/bjs.1800770731": [{"id": "influence", "unit": [{"value": "2.12334018404e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.37522926873", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.018": [{"id": "influence", "unit": [{"value": "2.56362457912e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.99687237632", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.006": [{"id": "influence", "unit": [{"value": "1.75711526235e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.125908992", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.019": [{"id": "influence", "unit": [{"value": "2.63339070632e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.83961125752", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.020": [{"id": "influence", "unit": [{"value": "2.55461990418e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.41411782656", "key": "score"}]}]} -{"10.1016/j.bspc.2011.05.010": [{"id": "influence", "unit": [{"value": "1.51285923901e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.254016", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.021": [{"id": "influence", "unit": [{"value": "1.6159596861e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.31560192", "key": "score"}]}]} -{"10.1016/j.bspc.2011.05.011": [{"id": "influence", "unit": [{"value": "1.48980937027e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.14121216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.022": [{"id": "influence", "unit": [{"value": "1.9593105197e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.85463808", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.002": [{"id": "influence", "unit": [{"value": "1.87083337854e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.93569536", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.023": [{"id": "influence", "unit": [{"value": "1.50264494118e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.406656", "key": "score"}]}]} -{"10.1002/bjs.1800800828": [{"id": "influence", "unit": [{"value": "2.05276349756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0877054147111", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.004": [{"id": "influence", "unit": [{"value": "1.53597044234e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.387108561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.024": [{"id": "influence", "unit": [{"value": "2.26725070012e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.761426539184", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.025": [{"id": "influence", "unit": [{"value": "1.77349231284e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.320827392", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.007": [{"id": "influence", "unit": [{"value": "1.40574735545e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.026": [{"id": "influence", "unit": [{"value": "1.65600720661e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3802496", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.027": [{"id": "influence", "unit": [{"value": "3.04379181834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.424121856", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.008": [{"id": "influence", "unit": [{"value": "1.39240284872e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.028": [{"id": "influence", "unit": [{"value": "2.16540873578e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.321552896", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.011": [{"id": "influence", "unit": [{"value": "1.41632306281e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.029": [{"id": "influence", "unit": [{"value": "1.7539262291e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0624137673444", "key": "score"}]}]} -{"10.1002/bjs.1800830140": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.013": [{"id": "influence", "unit": [{"value": "1.45752396087e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4352", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.007": [{"id": "influence", "unit": [{"value": "1.78985531064e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.847287296", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.03.022": [{"id": "influence", "unit": [{"value": "1.48785696778e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.46403656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.030": [{"id": "influence", "unit": [{"value": "1.75473038943e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.428627456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.031": [{"id": "influence", "unit": [{"value": "2.353279823e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7004429312", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.04.001": [{"id": "influence", "unit": [{"value": "1.48470666863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.008": [{"id": "influence", "unit": [{"value": "2.31587469588e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1074574336", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.032": [{"id": "influence", "unit": [{"value": "1.65229124421e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.331776", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.04.010": [{"id": "influence", "unit": [{"value": "1.44024892224e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.176", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.033": [{"id": "influence", "unit": [{"value": "1.9391730294e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1212379136", "key": "score"}]}]} -{"10.1002/bjs.1800770732": [{"id": "influence", "unit": [{"value": "3.20469067904e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.617849262205", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.034": [{"id": "influence", "unit": [{"value": "2.00336332378e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.5124096", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.05.002": [{"id": "influence", "unit": [{"value": "1.40497558269e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.009": [{"id": "influence", "unit": [{"value": "1.53536956023e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.11010816", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.035": [{"id": "influence", "unit": [{"value": "2.35484498246e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.63538354995", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.05.003": [{"id": "influence", "unit": [{"value": "1.42929749612e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8592", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.010": [{"id": "influence", "unit": [{"value": "4.48210438085e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.83282617277", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.036": [{"id": "influence", "unit": [{"value": "2.40889388834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.97399557531", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.037": [{"id": "influence", "unit": [{"value": "3.40892060939e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.24081647616", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.011": [{"id": "influence", "unit": [{"value": "2.86806610088e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2752125952", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.05.005": [{"id": "influence", "unit": [{"value": "1.42998991144e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792", "key": "score"}]}]} -{"10.1002/bjs.1800800829": [{"id": "influence", "unit": [{"value": "1.81725821605e-07", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.38541937564", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.039": [{"id": "influence", "unit": [{"value": "2.19678753647e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1962923008", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.040": [{"id": "influence", "unit": [{"value": "1.96294134291e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.23938263518", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.06.001": [{"id": "influence", "unit": [{"value": "1.41235330998e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.041": [{"id": "influence", "unit": [{"value": "1.66591219918e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4099147776", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.042": [{"id": "influence", "unit": [{"value": "1.46410726228e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4380466176", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.012": [{"id": "influence", "unit": [{"value": "2.49597519002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.3124057088", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.06.002": [{"id": "influence", "unit": [{"value": "1.40153855775e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.043": [{"id": "influence", "unit": [{"value": "1.42616308507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0340402176", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.013": [{"id": "influence", "unit": [{"value": "2.11485065994e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.931594752", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.06.007": [{"id": "influence", "unit": [{"value": "1.46452413768e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800830142": [{"id": "influence", "unit": [{"value": "3.8525597719e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.550382071798", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.005": [{"id": "influence", "unit": [{"value": "2.3257723807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65705032158", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.004": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.014": [{"id": "influence", "unit": [{"value": "1.5327892699e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3649536", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.006": [{"id": "influence", "unit": [{"value": "1.46835352817e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.07278336", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.005": [{"id": "influence", "unit": [{"value": "1.52733033069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.5456", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.007": [{"id": "influence", "unit": [{"value": "1.54120154122e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.862656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.015": [{"id": "influence", "unit": [{"value": "1.42339402133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0228427776", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.006": [{"id": "influence", "unit": [{"value": "1.3976712191e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.008": [{"id": "influence", "unit": [{"value": "2.26000243195e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.08396432317", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.010": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.07776", "key": "score"}]}]} -{"10.1002/bjs.1800770733": [{"id": "influence", "unit": [{"value": "1.68884478719e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00502888168651", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.016": [{"id": "influence", "unit": [{"value": "5.90446436104e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.5386956015", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.009": [{"id": "influence", "unit": [{"value": "1.46983224999e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.16920576", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.07.012": [{"id": "influence", "unit": [{"value": "1.39860088294e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.010": [{"id": "influence", "unit": [{"value": "1.50642113061e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.42336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.017": [{"id": "influence", "unit": [{"value": "1.59376657434e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.470071296", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.002": [{"id": "influence", "unit": [{"value": "1.46707085793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2011.06.011": [{"id": "influence", "unit": [{"value": "1.65681335047e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.55968912317", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.004": [{"id": "influence", "unit": [{"value": "1.44467832664e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.001": [{"id": "influence", "unit": [{"value": "2.89026709917e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.93443472317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.018": [{"id": "influence", "unit": [{"value": "2.0375067469e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.94857216", "key": "score"}]}]} -{"10.1002/bjs.1800770734": [{"id": "influence", "unit": [{"value": "1.52306975838e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.68456532868e-07", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.003": [{"id": "influence", "unit": [{"value": "1.41364327772e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.004": [{"id": "influence", "unit": [{"value": "2.21466751434e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.00473416158", "key": "score"}]}]} -{"10.1016/j.bspc.2011.07.007": [{"id": "influence", "unit": [{"value": "4.86853108806e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "30.2058513695", "key": "score"}]}]} -{"10.1016/j.bspc.2011.08.001": [{"id": "influence", "unit": [{"value": "1.40296984755e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.08.002": [{"id": "influence", "unit": [{"value": "1.46446008016e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.00478976", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.001": [{"id": "influence", "unit": [{"value": "1.50277913293e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1456", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.007": [{"id": "influence", "unit": [{"value": "1.39125640401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.002": [{"id": "influence", "unit": [{"value": "2.30880583754e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.13575936", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.011": [{"id": "influence", "unit": [{"value": "1.4875318134e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.945636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.003": [{"id": "influence", "unit": [{"value": "1.49277898572e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.300416", "key": "score"}]}]} -{"10.1002/bjs.1800800830": [{"id": "influence", "unit": [{"value": "1.39160184535e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.012": [{"id": "influence", "unit": [{"value": "1.45120593182e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4752", "key": "score"}]}]} -{"10.1016/j.bspc.2011.09.004": [{"id": "influence", "unit": [{"value": "1.75648155571e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.75135816158", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.09.013": [{"id": "influence", "unit": [{"value": "1.42718144062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bspc.2011.10.001": [{"id": "influence", "unit": [{"value": "1.56917990837e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50639616", "key": "score"}]}]} -{"10.1016/j.bspc.2011.10.002": [{"id": "influence", "unit": [{"value": "1.55073039343e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.9216", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.003": [{"id": "influence", "unit": [{"value": "4.08184163355e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.5074139232", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.045": [{"id": "influence", "unit": [{"value": "1.57148537666e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.26887458816", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.004": [{"id": "influence", "unit": [{"value": "1.40812470078e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.005": [{"id": "influence", "unit": [{"value": "1.4279434706e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4346496", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.006": [{"id": "influence", "unit": [{"value": "1.39407654571e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1591": [{"id": "influence", "unit": [{"value": "1.43105534658e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.484416", "key": "score"}]}]} -{"10.1002/bjs.1800830145": [{"id": "influence", "unit": [{"value": "1.40455863858e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.046": [{"id": "influence", "unit": [{"value": "1.82938384104e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7020032", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.006": [{"id": "influence", "unit": [{"value": "1.45513920888e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.622656", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.013": [{"id": "influence", "unit": [{"value": "1.39559783055e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.047": [{"id": "influence", "unit": [{"value": "2.7682406054e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.77286771986", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.048": [{"id": "influence", "unit": [{"value": "1.68108910902e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.293796561584", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.016": [{"id": "influence", "unit": [{"value": "1.41132235292e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bspc.2011.11.007": [{"id": "influence", "unit": [{"value": "1.45417934601e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.046656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.049": [{"id": "influence", "unit": [{"value": "1.66245115877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.87941210112", "key": "score"}]}]} -{"10.1016/j.bspc.2011.12.001": [{"id": "influence", "unit": [{"value": "1.44947873874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0559872", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.10.018": [{"id": "influence", "unit": [{"value": "1.39642212834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800830146": [{"id": "influence", "unit": [{"value": "2.13786944508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.214116867573", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.01.051": [{"id": "influence", "unit": [{"value": "2.27454576941e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7399003136", "key": "score"}]}]} -{"10.1016/j.bspc.2012.01.001": [{"id": "influence", "unit": [{"value": "1.42867097637e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2439936", "key": "score"}]}]} -{"10.1016/j.bspc.2012.01.002": [{"id": "influence", "unit": [{"value": "2.7419514496e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.99973192158", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.002": [{"id": "influence", "unit": [{"value": "1.43685773631e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17603656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.001": [{"id": "influence", "unit": [{"value": "2.15244546006e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.12111921152", "key": "score"}]}]} -{"10.1016/j.bspc.2012.02.001": [{"id": "influence", "unit": [{"value": "1.47022156874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.176256", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.003": [{"id": "influence", "unit": [{"value": "1.45558088501e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.002": [{"id": "influence", "unit": [{"value": "1.47304155508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.53865216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.02.002": [{"id": "influence", "unit": [{"value": "3.41579032033e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.98626376158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.003": [{"id": "influence", "unit": [{"value": "2.15231173511e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.741149696", "key": "score"}]}]} -{"10.1002/bjs.1800770735": [{"id": "influence", "unit": [{"value": "1.92149504752e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00363791484297", "key": "score"}]}]} -{"10.1016/j.bspc.2012.02.003": [{"id": "influence", "unit": [{"value": "1.52898970342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.08024832", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.004": [{"id": "influence", "unit": [{"value": "1.77871461002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.072256", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.004": [{"id": "influence", "unit": [{"value": "1.46366832841e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.002": [{"id": "influence", "unit": [{"value": "1.62278902873e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.830016", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.005": [{"id": "influence", "unit": [{"value": "2.09778019577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.49759266816", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.003": [{"id": "influence", "unit": [{"value": "1.39933875286e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.005": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.019": [{"id": "influence", "unit": [{"value": "4.93120641303e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.6419024992", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.006": [{"id": "influence", "unit": [{"value": "1.78927152939e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.5856256", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.004": [{"id": "influence", "unit": [{"value": "1.47189687444e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1002/bjs.1800800831": [{"id": "influence", "unit": [{"value": "2.9648572286e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000674059986411", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.006": [{"id": "influence", "unit": [{"value": "1.41067712469e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.007": [{"id": "influence", "unit": [{"value": "2.08658259817e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.74721816576", "key": "score"}]}]} -{"10.1016/j.bspc.2012.03.005": [{"id": "influence", "unit": [{"value": "1.75916854294e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08054856158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.008": [{"id": "influence", "unit": [{"value": "1.43916267878e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1524096", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.020": [{"id": "influence", "unit": [{"value": "3.29252368896e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.52968049118", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.002": [{"id": "influence", "unit": [{"value": "1.68970062986e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.418176", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.009": [{"id": "influence", "unit": [{"value": "2.51299479608e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.586545152", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.008": [{"id": "influence", "unit": [{"value": "1.43655436141e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.003": [{"id": "influence", "unit": [{"value": "1.56910165297e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3616", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.010": [{"id": "influence", "unit": [{"value": "2.50510350062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.28562645402", "key": "score"}]}]} -{"10.1002/bjs.1800830149": [{"id": "influence", "unit": [{"value": "1.44996628301e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.31621703842e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.022": [{"id": "influence", "unit": [{"value": "3.08555072694e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.24665806235", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.011": [{"id": "influence", "unit": [{"value": "4.71181245507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.90610352813", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.004": [{"id": "influence", "unit": [{"value": "1.87118458967e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.484416", "key": "score"}]}]} -{"10.1016/j.bulcan.2015.11.009": [{"id": "influence", "unit": [{"value": "1.40034306297e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.012": [{"id": "influence", "unit": [{"value": "1.6894598116e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32608317918", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.005": [{"id": "influence", "unit": [{"value": "1.93019250357e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3231232", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.006": [{"id": "influence", "unit": [{"value": "1.40590676851e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.014": [{"id": "influence", "unit": [{"value": "3.18395908128e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.55022424952", "key": "score"}]}]} -{"10.1016/j.bspc.2012.04.007": [{"id": "influence", "unit": [{"value": "1.92454730741e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.0294656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.016": [{"id": "influence", "unit": [{"value": "3.58531933966e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.7068543072", "key": "score"}]}]} -{"10.1002/bjs.1800830152": [{"id": "influence", "unit": [{"value": "1.39487844463e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0060466176", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.01.007": [{"id": "influence", "unit": [{"value": "1.41393736665e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.001": [{"id": "influence", "unit": [{"value": "1.48065110517e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.37152", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.02.017": [{"id": "influence", "unit": [{"value": "2.63861213244e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.13082327518", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.002": [{"id": "influence", "unit": [{"value": "2.03880211876e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.94457232317", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.001": [{"id": "influence", "unit": [{"value": "2.09551529965e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.30398976", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.003": [{"id": "influence", "unit": [{"value": "1.45322684878e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.01.008": [{"id": "influence", "unit": [{"value": "1.39660093165e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.004": [{"id": "influence", "unit": [{"value": "2.04168254788e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.34762496", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.002": [{"id": "influence", "unit": [{"value": "1.40331980195e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.01.014": [{"id": "influence", "unit": [{"value": "1.423113652e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1002/bjs.1800770736": [{"id": "influence", "unit": [{"value": "1.66561348149e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.006719779", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.005": [{"id": "influence", "unit": [{"value": "1.88489488647e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.332288", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.003": [{"id": "influence", "unit": [{"value": "1.54367231064e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08445256158", "key": "score"}]}]} -{"10.1016/j.bspc.2012.05.007": [{"id": "influence", "unit": [{"value": "1.60626139405e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7726464", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.004": [{"id": "influence", "unit": [{"value": "1.91627023547e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.8644352", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.001": [{"id": "influence", "unit": [{"value": "1.94373623456e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.7065472", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.002": [{"id": "influence", "unit": [{"value": "1.47337150931e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.63936", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.005": [{"id": "influence", "unit": [{"value": "2.09473054887e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3023142912", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.003": [{"id": "influence", "unit": [{"value": "1.66051940624e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.56172616158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.02.002": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.006": [{"id": "influence", "unit": [{"value": "4.48020877024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.26474456954", "key": "score"}]}]} -{"10.1002/bjs.1800830155": [{"id": "influence", "unit": [{"value": "2.43949008345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89965759807", "key": "score"}]}]} -{"10.1016/j.bspc.2012.06.004": [{"id": "influence", "unit": [{"value": "1.67071735997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.3098496", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.002": [{"id": "influence", "unit": [{"value": "1.51021836218e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0879872", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.007": [{"id": "influence", "unit": [{"value": "2.91136190037e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.68252848128", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.003": [{"id": "influence", "unit": [{"value": "2.35071170617e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.40720456158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.008": [{"id": "influence", "unit": [{"value": "1.59654929626e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.849636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.004": [{"id": "influence", "unit": [{"value": "2.47183104353e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.93606032317", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.02.014": [{"id": "influence", "unit": [{"value": "1.43621870424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3056", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.009": [{"id": "influence", "unit": [{"value": "2.51885939553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.8166272", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.005": [{"id": "influence", "unit": [{"value": "2.60852588989e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.87046216158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.010": [{"id": "influence", "unit": [{"value": "2.09644082764e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89427596152", "key": "score"}]}]} -{"10.1002/bjs.1800800833": [{"id": "influence", "unit": [{"value": "2.55234515315e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.214631171353", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.006": [{"id": "influence", "unit": [{"value": "1.41592740525e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.007": [{"id": "influence", "unit": [{"value": "1.60931157026e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.645156561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.011": [{"id": "influence", "unit": [{"value": "1.9171976596e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1630923776", "key": "score"}]}]} -{"10.1016/j.bspc.2012.08.008": [{"id": "influence", "unit": [{"value": "1.77151253655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.18646856158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.02.016": [{"id": "influence", "unit": [{"value": "1.41123181198e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bspc.2012.09.002": [{"id": "influence", "unit": [{"value": "1.81080733373e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.804416", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.012": [{"id": "influence", "unit": [{"value": "1.78400697147e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.339024896", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.014": [{"id": "influence", "unit": [{"value": "1.86722652606e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.861257216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.09.003": [{"id": "influence", "unit": [{"value": "1.85721878597e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.54819656158", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.001": [{"id": "influence", "unit": [{"value": "1.56093891548e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392256", "key": "score"}]}]} -{"10.1002/bjs.1800830202": [{"id": "influence", "unit": [{"value": "1.858607198e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0787562838158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.03.015": [{"id": "influence", "unit": [{"value": "1.44367537251e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.124416", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.06.001": [{"id": "influence", "unit": [{"value": "1.4408438485e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.002": [{"id": "influence", "unit": [{"value": "1.42914282325e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3879936", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.06.002": [{"id": "influence", "unit": [{"value": "1.79318290607e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.31881612152", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.003": [{"id": "influence", "unit": [{"value": "1.43405918808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.07.002": [{"id": "influence", "unit": [{"value": "2.63480635463e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.20876449042", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.004": [{"id": "influence", "unit": [{"value": "1.51600991431e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03776", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.001": [{"id": "influence", "unit": [{"value": "3.41561621069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.48469276469", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.005": [{"id": "influence", "unit": [{"value": "2.0954843164e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.350272", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.002": [{"id": "influence", "unit": [{"value": "2.03007024589e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.20105998814", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1638": [{"id": "influence", "unit": [{"value": "1.97048204379e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.02954056158", "key": "score"}]}]} -{"10.1002/bjs.1800770737": [{"id": "influence", "unit": [{"value": "1.43454963257e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.14094221446e-07", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.006": [{"id": "influence", "unit": [{"value": "1.58802929789e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.892224", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.003": [{"id": "influence", "unit": [{"value": "1.96098591035e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.00031232", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.004": [{"id": "influence", "unit": [{"value": "1.94968738424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.34653229056", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.03.007": [{"id": "influence", "unit": [{"value": "1.44873112468e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.10.007": [{"id": "influence", "unit": [{"value": "1.41075381717e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.56736", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.005": [{"id": "influence", "unit": [{"value": "1.58344035352e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.066811392", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.003": [{"id": "influence", "unit": [{"value": "1.98011585561e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.9927936", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.023": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.010077696", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.03.012": [{"id": "influence", "unit": [{"value": "1.4670945341e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.004": [{"id": "influence", "unit": [{"value": "1.40719075877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800830203": [{"id": "influence", "unit": [{"value": "2.0770745324e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000183555403284", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.08.006": [{"id": "influence", "unit": [{"value": "1.41271524791e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.010077696", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.005": [{"id": "influence", "unit": [{"value": "1.69216745379e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.222656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.024": [{"id": "influence", "unit": [{"value": "3.08536854974e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.67624991677", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.006": [{"id": "influence", "unit": [{"value": "1.48567657775e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.778176", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.09.001": [{"id": "influence", "unit": [{"value": "1.57686722593e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.9684096", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.007": [{"id": "influence", "unit": [{"value": "1.44264027248e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.03.013": [{"id": "influence", "unit": [{"value": "1.39260374604e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.08.025": [{"id": "influence", "unit": [{"value": "2.37603130176e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.37308795358", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.008": [{"id": "influence", "unit": [{"value": "2.06935444063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.8096256", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.09.002": [{"id": "influence", "unit": [{"value": "2.29069082446e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.76957126656", "key": "score"}]}]} -{"10.1002/bjs.1800770738": [{"id": "influence", "unit": [{"value": "1.7500071181e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000320992486649", "key": "score"}]}]} -{"10.1016/j.bspc.2012.11.009": [{"id": "influence", "unit": [{"value": "1.50593182995e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0777965615844", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.09.003": [{"id": "influence", "unit": [{"value": "2.25729707375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.06345216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.001": [{"id": "influence", "unit": [{"value": "1.47509940962e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.992256", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.001": [{"id": "influence", "unit": [{"value": "2.41308058053e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.4571136", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.001": [{"id": "influence", "unit": [{"value": "2.10088642476e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.29884027358", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.003": [{"id": "influence", "unit": [{"value": "1.45828465076e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.262692561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.002": [{"id": "influence", "unit": [{"value": "3.90076920919e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.00985654067", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.04.003": [{"id": "influence", "unit": [{"value": "1.45606959808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.004": [{"id": "influence", "unit": [{"value": "1.43571558404e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.67776", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.005": [{"id": "influence", "unit": [{"value": "1.50594954341e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1002/bjs.1800800837": [{"id": "influence", "unit": [{"value": "6.24597092541e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.733724383441", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.003": [{"id": "influence", "unit": [{"value": "2.10065608511e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.23003830069", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.002": [{"id": "influence", "unit": [{"value": "1.6486496091e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.177417216", "key": "score"}]}]} -{"10.1016/j.bspc.2012.12.006": [{"id": "influence", "unit": [{"value": "1.45961810886e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.51552", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.004": [{"id": "influence", "unit": [{"value": "1.57516928853e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.656733696", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.001": [{"id": "influence", "unit": [{"value": "2.24775304269e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.82166856158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.005": [{"id": "influence", "unit": [{"value": "3.44497115805e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.3739594752", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.003": [{"id": "influence", "unit": [{"value": "1.70979424538e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.284032", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.04.005": [{"id": "influence", "unit": [{"value": "1.42169219577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.003": [{"id": "influence", "unit": [{"value": "2.61737931603e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.36019960152", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.006": [{"id": "influence", "unit": [{"value": "1.81186047743e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.87449928158", "key": "score"}]}]} -{"10.1002/bjs.1800830204": [{"id": "influence", "unit": [{"value": "2.60412862308e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.481741015127", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.004": [{"id": "influence", "unit": [{"value": "1.47720057206e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.300416", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.005": [{"id": "influence", "unit": [{"value": "4.26903097374e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "21.8027611232", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.009": [{"id": "influence", "unit": [{"value": "2.23307511579e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.990199296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.007": [{"id": "influence", "unit": [{"value": "1.73383624405e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.96765256158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.010": [{"id": "influence", "unit": [{"value": "1.96084984716e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.47321133056", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.004": [{"id": "influence", "unit": [{"value": "2.10585129583e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.332727296", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.05.004": [{"id": "influence", "unit": [{"value": "1.39926815551e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.008": [{"id": "influence", "unit": [{"value": "1.42307022879e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.012": [{"id": "influence", "unit": [{"value": "2.16767541345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.09906493918", "key": "score"}]}]} -{"10.1016/j.bspc.2013.01.009": [{"id": "influence", "unit": [{"value": "1.42648139061e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.10976", "key": "score"}]}]} -{"10.1002/bjs.1800770742": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.013": [{"id": "influence", "unit": [{"value": "1.5868275828e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.940416", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.001": [{"id": "influence", "unit": [{"value": "1.51359710756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.097856", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.014": [{"id": "influence", "unit": [{"value": "1.75997908697e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.65738797056", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.005": [{"id": "influence", "unit": [{"value": "1.73561874023e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.38471936", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.002": [{"id": "influence", "unit": [{"value": "1.46010837206e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.56736", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.015": [{"id": "influence", "unit": [{"value": "3.73714413721e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.2046204505", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.05.008": [{"id": "influence", "unit": [{"value": "1.43489715264e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.003": [{"id": "influence", "unit": [{"value": "2.20181050117e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.129782807922", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.016": [{"id": "influence", "unit": [{"value": "1.5096982362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.004": [{"id": "influence", "unit": [{"value": "1.48751300871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.562176", "key": "score"}]}]} -{"10.1002/bjs.1800830205": [{"id": "influence", "unit": [{"value": "2.07546745762e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.389780541338", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.06.004": [{"id": "influence", "unit": [{"value": "1.42154378584e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.006": [{"id": "influence", "unit": [{"value": "1.92612889953e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.736576", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.017": [{"id": "influence", "unit": [{"value": "5.87547960299e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.8464802707", "key": "score"}]}]} -{"10.1016/j.bspc.2013.02.007": [{"id": "influence", "unit": [{"value": "1.69981358024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08381256158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.018": [{"id": "influence", "unit": [{"value": "3.93068999929e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.06270442429", "key": "score"}]}]} -{"10.1016/j.bspc.2013.03.001": [{"id": "influence", "unit": [{"value": "1.59080169945e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.838656", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.019": [{"id": "influence", "unit": [{"value": "2.93890424354e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.794432721584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.001": [{"id": "influence", "unit": [{"value": "1.41632660599e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.15552", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.07.002": [{"id": "influence", "unit": [{"value": "1.7538379036e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.020": [{"id": "influence", "unit": [{"value": "2.05213687439e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.6966651747", "key": "score"}]}]} -{"10.1002/bjs.1800770743": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.002": [{"id": "influence", "unit": [{"value": "1.46668954781e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8592", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.003": [{"id": "influence", "unit": [{"value": "2.04158706672e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.28099656158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.08.002": [{"id": "influence", "unit": [{"value": "1.41989530088e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.021": [{"id": "influence", "unit": [{"value": "1.43581558559e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.020155392", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.004": [{"id": "influence", "unit": [{"value": "1.59322968531e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.5072", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.022": [{"id": "influence", "unit": [{"value": "2.53870392939e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.380812288", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.005": [{"id": "influence", "unit": [{"value": "1.63957594784e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.446016", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.023": [{"id": "influence", "unit": [{"value": "2.04162205464e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.14829122493", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.007": [{"id": "influence", "unit": [{"value": "1.47964959347e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.006": [{"id": "influence", "unit": [{"value": "1.45680104502e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1002/bjs.1800830206": [{"id": "influence", "unit": [{"value": "4.7755259417e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.75292801941", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.10.024": [{"id": "influence", "unit": [{"value": "1.54217630592e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.132627456", "key": "score"}]}]} -{"10.1016/j.bspc.2013.04.007": [{"id": "influence", "unit": [{"value": "1.40656153948e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.001": [{"id": "influence", "unit": [{"value": "1.58954288535e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.226400182272", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.001": [{"id": "influence", "unit": [{"value": "1.79480243584e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08135312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.002": [{"id": "influence", "unit": [{"value": "1.90218857374e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.13314502656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.002": [{"id": "influence", "unit": [{"value": "1.51741394277e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.622656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.003": [{"id": "influence", "unit": [{"value": "1.5043342575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.56736", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.003": [{"id": "influence", "unit": [{"value": "1.7649818355e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.947037696", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.004": [{"id": "influence", "unit": [{"value": "1.54003126301e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3284096", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.017": [{"id": "influence", "unit": [{"value": "1.39858392433e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800770802": [{"id": "influence", "unit": [{"value": "1.45011339633e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00366490060093", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.005": [{"id": "influence", "unit": [{"value": "1.45092474662e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8439936", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.004": [{"id": "influence", "unit": [{"value": "1.82109019622e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.692416", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.006": [{"id": "influence", "unit": [{"value": "1.6345833814e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.02976", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.007": [{"id": "influence", "unit": [{"value": "1.54062730593e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.536", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.019": [{"id": "influence", "unit": [{"value": "1.42045726031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.008": [{"id": "influence", "unit": [{"value": "1.41288622828e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.009": [{"id": "influence", "unit": [{"value": "1.73146483971e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.320192", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.010": [{"id": "influence", "unit": [{"value": "2.67760514906e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.183296", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.09.020": [{"id": "influence", "unit": [{"value": "1.4535188035e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1016/j.bspc.2013.05.011": [{"id": "influence", "unit": [{"value": "1.41922409304e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.164": [{"id": "influence", "unit": [{"value": "1.7956661054e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.4537344", "key": "score"}]}]} -{"10.1002/bjs.1800830207": [{"id": "influence", "unit": [{"value": "4.32253172896e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.76958348313", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.001": [{"id": "influence", "unit": [{"value": "1.76383175573e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.96861256158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.002": [{"id": "influence", "unit": [{"value": "1.41096202364e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.003": [{"id": "influence", "unit": [{"value": "1.41108227319e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43776", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.10.014": [{"id": "influence", "unit": [{"value": "1.45665273718e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.004": [{"id": "influence", "unit": [{"value": "2.38261952892e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.7201536", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.005": [{"id": "influence", "unit": [{"value": "2.21563430941e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.779712", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.006": [{"id": "influence", "unit": [{"value": "1.73040878704e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.933156561584", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.10.016": [{"id": "influence", "unit": [{"value": "1.43358943959e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.006": [{"id": "influence", "unit": [{"value": "5.37226124464e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.7435994617", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.007": [{"id": "influence", "unit": [{"value": "1.48555122279e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.27776", "key": "score"}]}]} -{"10.1002/bjs.1800770803": [{"id": "influence", "unit": [{"value": "1.91665335176e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0807617024201", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.006": [{"id": "influence", "unit": [{"value": "2.03315475884e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.07008", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.008": [{"id": "influence", "unit": [{"value": "1.59922182052e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.78976", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.007": [{"id": "influence", "unit": [{"value": "5.54734257544e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.97335423933", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.10.017": [{"id": "influence", "unit": [{"value": "1.40149434529e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.009": [{"id": "influence", "unit": [{"value": "1.48209655985e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.622656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.010": [{"id": "influence", "unit": [{"value": "1.42292725345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.007": [{"id": "influence", "unit": [{"value": "1.57140606031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50121216", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.008": [{"id": "influence", "unit": [{"value": "2.06004951422e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.33518811546", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.011": [{"id": "influence", "unit": [{"value": "2.05131160693e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.35766856158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.009": [{"id": "influence", "unit": [{"value": "2.14117908561e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.03087548416", "key": "score"}]}]} -{"10.1002/bjs.1800830208": [{"id": "influence", "unit": [{"value": "3.20505216178e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.60325951815", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.11.006": [{"id": "influence", "unit": [{"value": "1.64070418842e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.552", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.009": [{"id": "influence", "unit": [{"value": "1.60503395919e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.585939456", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.012": [{"id": "influence", "unit": [{"value": "1.46380890355e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.124416", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.010": [{"id": "influence", "unit": [{"value": "1.78447836614e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.434909696", "key": "score"}]}]} -{"10.1016/j.bspc.2013.06.014": [{"id": "influence", "unit": [{"value": "2.10439938784e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.41984", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.002": [{"id": "influence", "unit": [{"value": "1.47791433203e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.171072", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.011": [{"id": "influence", "unit": [{"value": "1.51450599992e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.021149724672", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.010": [{"id": "influence", "unit": [{"value": "1.61980833189e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.450077696", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.11.007": [{"id": "influence", "unit": [{"value": "2.20029297808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.776", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.012": [{"id": "influence", "unit": [{"value": "1.4807537002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1213056", "key": "score"}]}]} -{"10.1002/bjs.1800770804": [{"id": "influence", "unit": [{"value": "2.69852546285e-07", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.6426677993", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.003": [{"id": "influence", "unit": [{"value": "1.47877912317e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0752", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.013": [{"id": "influence", "unit": [{"value": "1.74377162081e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.902528387089", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.004": [{"id": "influence", "unit": [{"value": "1.5601937833e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.52736", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.011": [{"id": "influence", "unit": [{"value": "1.82275547777e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.992983761584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.005": [{"id": "influence", "unit": [{"value": "1.71330840977e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.03741256158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.014": [{"id": "influence", "unit": [{"value": "2.05927561501e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0968395776", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.11.021": [{"id": "influence", "unit": [{"value": "1.43846583976e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576036561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.006": [{"id": "influence", "unit": [{"value": "1.86722755857e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.948608", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.015": [{"id": "influence", "unit": [{"value": "2.0221560342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.79577928158", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.12.002": [{"id": "influence", "unit": [{"value": "1.50003547797e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600036561584", "key": "score"}]}]} -{"10.1002/bjs.1800830209": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.012": [{"id": "influence", "unit": [{"value": "1.51949775879e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.65279232", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.009": [{"id": "influence", "unit": [{"value": "1.47460748411e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.016": [{"id": "influence", "unit": [{"value": "1.76021627338e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.204256301744", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.010": [{"id": "influence", "unit": [{"value": "1.69820215509e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.44896", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.11.017": [{"id": "influence", "unit": [{"value": "1.69487650784e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.459116752896", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.011": [{"id": "influence", "unit": [{"value": "1.98600655464e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.71143312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.013": [{"id": "influence", "unit": [{"value": "1.62681987218e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.686011392", "key": "score"}]}]} -{"10.1016/j.bspc.2013.07.012": [{"id": "influence", "unit": [{"value": "1.42747769246e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.29376", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.001": [{"id": "influence", "unit": [{"value": "1.88543041649e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.310252032", "key": "score"}]}]} -{"10.1016/j.bulcan.2016.12.005": [{"id": "influence", "unit": [{"value": "1.4000325672e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1002/bjs.1800770805": [{"id": "influence", "unit": [{"value": "3.95911883883e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.130740273398", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.003": [{"id": "influence", "unit": [{"value": "2.82814827126e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.99266043291", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.014": [{"id": "influence", "unit": [{"value": "4.80336134235e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.5715934399", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.001": [{"id": "influence", "unit": [{"value": "1.49317096206e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.004": [{"id": "influence", "unit": [{"value": "1.70126097738e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.832160875184", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.01.008": [{"id": "influence", "unit": [{"value": "1.3891346097e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.003": [{"id": "influence", "unit": [{"value": "1.48610121089e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03779656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.006": [{"id": "influence", "unit": [{"value": "2.25782736868e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.61282376158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.004": [{"id": "influence", "unit": [{"value": "1.47377372924e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4752", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.007": [{"id": "influence", "unit": [{"value": "2.09474263951e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.96689593277", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.015": [{"id": "influence", "unit": [{"value": "1.61635696414e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.782393856", "key": "score"}]}]} -{"10.1002/bjs.1800830210": [{"id": "influence", "unit": [{"value": "2.11466703865e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.51222298843", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.01.010": [{"id": "influence", "unit": [{"value": "1.44925719265e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.008": [{"id": "influence", "unit": [{"value": "2.71276906539e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33538263518", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.006": [{"id": "influence", "unit": [{"value": "2.54979930121e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.358528", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.09.017": [{"id": "influence", "unit": [{"value": "1.48833157595e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.093312", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.007": [{"id": "influence", "unit": [{"value": "1.49683898057e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.171072", "key": "score"}]}]} -{"10.1016/j.buildenv.2007.12.009": [{"id": "influence", "unit": [{"value": "1.61364350622e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.212497617584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.008": [{"id": "influence", "unit": [{"value": "1.79526562745e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.265216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.001": [{"id": "influence", "unit": [{"value": "1.80129550723e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.5878656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.009": [{"id": "influence", "unit": [{"value": "1.60124235618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.65312", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.02.003": [{"id": "influence", "unit": [{"value": "1.56392297095e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49603656158", "key": "score"}]}]} -{"10.1002/bjs.1800770806": [{"id": "influence", "unit": [{"value": "4.19961167825e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.336593663636", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.010": [{"id": "influence", "unit": [{"value": "1.54227647079e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.387108561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.011": [{"id": "influence", "unit": [{"value": "1.42174301871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.406656", "key": "score"}]}]} -{"10.1016/j.bspc.2013.08.012": [{"id": "influence", "unit": [{"value": "1.71620737386e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.73703312317", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.001": [{"id": "influence", "unit": [{"value": "1.97476892991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.08288", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.03.014": [{"id": "influence", "unit": [{"value": "1.40737074166e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.002": [{"id": "influence", "unit": [{"value": "2.75902099141e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.34757192158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.002": [{"id": "influence", "unit": [{"value": "1.4221541576e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0777965615844", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.006": [{"id": "influence", "unit": [{"value": "1.53714684338e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.691849123169", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.003": [{"id": "influence", "unit": [{"value": "2.74075214023e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.10542459358", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.007": [{"id": "influence", "unit": [{"value": "1.40484945831e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800830212": [{"id": "influence", "unit": [{"value": "2.66969531353e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.775297283766", "key": "score"}]}]} -{"10.1016/j.bspc.2013.09.008": [{"id": "influence", "unit": [{"value": "1.53719196499e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.59229256158", "key": "score"}]}]} -{"10.1016/j.bulcan.2017.06.014": [{"id": "influence", "unit": [{"value": "2.0769133069e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.001": [{"id": "influence", "unit": [{"value": "1.57961160131e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.54045256158", "key": "score"}]}]} -{"10.1016/j.bulm.2003.06.001": [{"id": "influence", "unit": [{"value": "1.80732611524e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.29635246542", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.003": [{"id": "influence", "unit": [{"value": "1.47190380983e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.004": [{"id": "influence", "unit": [{"value": "1.430203191e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.005": [{"id": "influence", "unit": [{"value": "1.60126571041e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.74336", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.001": [{"id": "influence", "unit": [{"value": "1.49168734938e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.006": [{"id": "influence", "unit": [{"value": "1.50298327317e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.92003656158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.007": [{"id": "influence", "unit": [{"value": "2.62308960492e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.820352", "key": "score"}]}]} -{"10.1002/bjs.1800770808": [{"id": "influence", "unit": [{"value": "3.62464274217e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.250905373926", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.008": [{"id": "influence", "unit": [{"value": "1.47031938238e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.003": [{"id": "influence", "unit": [{"value": "1.53172771927e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.05670576033", "key": "score"}]}]} -{"10.1016/j.bspc.2013.10.009": [{"id": "influence", "unit": [{"value": "1.58684001141e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.176", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.001": [{"id": "influence", "unit": [{"value": "3.39234937342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.59072", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.002": [{"id": "influence", "unit": [{"value": "1.71916767108e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.58336", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.004": [{"id": "influence", "unit": [{"value": "2.14182522117e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.712082843704", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.003": [{"id": "influence", "unit": [{"value": "1.5445932303e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.792036561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.005": [{"id": "influence", "unit": [{"value": "1.44920401063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.29376", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.006": [{"id": "influence", "unit": [{"value": "2.09740130553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.01219656158", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.008": [{"id": "influence", "unit": [{"value": "1.55587287007e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.78336", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.170": [{"id": "influence", "unit": [{"value": "1.92783349862e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.30695824317", "key": "score"}]}]} -{"10.1002/bjs.1800830213": [{"id": "influence", "unit": [{"value": "1.9228154406e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0186312116195", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.009": [{"id": "influence", "unit": [{"value": "1.47562713863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.484452561584", "key": "score"}]}]} -{"10.1016/j.bspc.2013.11.010": [{"id": "influence", "unit": [{"value": "1.69559094393e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.49536", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.005": [{"id": "influence", "unit": [{"value": "1.70752904263e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.495345007", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.001": [{"id": "influence", "unit": [{"value": "1.45869463639e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.86976", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.006": [{"id": "influence", "unit": [{"value": "1.90665348612e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.11658592037", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.003": [{"id": "influence", "unit": [{"value": "2.36070212814e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.41216", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.005": [{"id": "influence", "unit": [{"value": "1.52177833636e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.46656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.004": [{"id": "influence", "unit": [{"value": "1.62691418254e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.6938843136", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.007": [{"id": "influence", "unit": [{"value": "1.60055584054e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.881929123169", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.007": [{"id": "influence", "unit": [{"value": "3.0909151793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.21294559475", "key": "score"}]}]} -{"10.1002/bjs.1800770809": [{"id": "influence", "unit": [{"value": "2.17559933322e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0565278437647", "key": "score"}]}]} -{"10.1016/j.bspc.2013.12.009": [{"id": "influence", "unit": [{"value": "2.01097818689e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.4624", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.001": [{"id": "influence", "unit": [{"value": "1.4652504107e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50976", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.004": [{"id": "influence", "unit": [{"value": "1.53686766653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.574016", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.005": [{"id": "influence", "unit": [{"value": "1.72482229454e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.784249856", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.005": [{"id": "influence", "unit": [{"value": "1.43713770834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.008": [{"id": "influence", "unit": [{"value": "1.46156247007e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.399847550386", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.007": [{"id": "influence", "unit": [{"value": "1.59720123874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.792256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.008": [{"id": "influence", "unit": [{"value": "1.84986898731e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.70339656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.009": [{"id": "influence", "unit": [{"value": "2.33910266535e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.847232", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.006": [{"id": "influence", "unit": [{"value": "1.51993130901e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.152446161584", "key": "score"}]}]} -{"10.1002/bjs.1800830214": [{"id": "influence", "unit": [{"value": "2.10522763705e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03109834889", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.010": [{"id": "influence", "unit": [{"value": "1.79772261204e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.613376", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.009": [{"id": "influence", "unit": [{"value": "2.60879631462e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.69399400721", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.011": [{"id": "influence", "unit": [{"value": "1.66223463553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.85376", "key": "score"}]}]} -{"10.1016/j.bspc.2014.01.012": [{"id": "influence", "unit": [{"value": "1.4595335457e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.37152", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.007": [{"id": "influence", "unit": [{"value": "1.76240621174e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0560256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.001": [{"id": "influence", "unit": [{"value": "1.85982964893e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.057472", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.002": [{"id": "influence", "unit": [{"value": "1.66924539165e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.0496", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.010": [{"id": "influence", "unit": [{"value": "6.75628008372e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.31071340444", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.005": [{"id": "influence", "unit": [{"value": "1.66228182194e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2752", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.011": [{"id": "influence", "unit": [{"value": "1.76003945695e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.112066832532", "key": "score"}]}]} -{"10.1002/bjs.1800770810": [{"id": "influence", "unit": [{"value": "2.89020671133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00151463222663", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.008": [{"id": "influence", "unit": [{"value": "1.43008843857e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0777965615844", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.009": [{"id": "influence", "unit": [{"value": "1.92105415314e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.53059656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.010": [{"id": "influence", "unit": [{"value": "1.76155880783e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.63399312317", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.012": [{"id": "influence", "unit": [{"value": "1.68809250653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.942568402374", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.011": [{"id": "influence", "unit": [{"value": "1.63678945608e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.47296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.02.012": [{"id": "influence", "unit": [{"value": "1.56715319777e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.928256", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.013": [{"id": "influence", "unit": [{"value": "1.6610562052e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.370599478788", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.014": [{"id": "influence", "unit": [{"value": "1.58564858536e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.361306069402", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.015": [{"id": "influence", "unit": [{"value": "1.67918091154e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.293599066522", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.001": [{"id": "influence", "unit": [{"value": "1.6178572876e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.95936", "key": "score"}]}]} -{"10.1002/bjs.1800830215": [{"id": "influence", "unit": [{"value": "3.95872687348e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.119697056722", "key": "score"}]}]} -{"10.1016/j.bulm.2003.08.016": [{"id": "influence", "unit": [{"value": "2.07426602679e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.71532561397", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.004": [{"id": "influence", "unit": [{"value": "3.0367948727e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.6020388079", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.005": [{"id": "influence", "unit": [{"value": "1.64722280811e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.568", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.001": [{"id": "influence", "unit": [{"value": "1.67859186006e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.120013982761", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.006": [{"id": "influence", "unit": [{"value": "1.6067774989e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.43555656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.007": [{"id": "influence", "unit": [{"value": "2.53637875633e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.0832", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.008": [{"id": "influence", "unit": [{"value": "1.56942794029e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.86589256158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.009": [{"id": "influence", "unit": [{"value": "1.4479145491e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.002": [{"id": "influence", "unit": [{"value": "1.67656432427e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.215988055179", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.010": [{"id": "influence", "unit": [{"value": "1.8198075692e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49699656158", "key": "score"}]}]} -{"10.1002/bjs.1800770811": [{"id": "influence", "unit": [{"value": "1.67187553969e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0695100183041", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.012": [{"id": "influence", "unit": [{"value": "1.95532453508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.515593123169", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.003": [{"id": "influence", "unit": [{"value": "1.40775765812e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0013060694016", "key": "score"}]}]} -{"10.1016/j.bspc.2014.03.013": [{"id": "influence", "unit": [{"value": "1.5060128486e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08963656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.001": [{"id": "influence", "unit": [{"value": "1.71744317531e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.18819656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.003": [{"id": "influence", "unit": [{"value": "1.70700554472e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.005": [{"id": "influence", "unit": [{"value": "1.60913042425e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.141247514051", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.004": [{"id": "influence", "unit": [{"value": "1.54004068867e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.176256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.005": [{"id": "influence", "unit": [{"value": "1.43786305861e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.008": [{"id": "influence", "unit": [{"value": "1.59868004966e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.006": [{"id": "influence", "unit": [{"value": "1.51352700749e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.047998630986", "key": "score"}]}]} -{"10.1002/bjs.1800830216": [{"id": "influence", "unit": [{"value": "4.19695308756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.892091916084", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.009": [{"id": "influence", "unit": [{"value": "1.44773774163e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43776", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.010": [{"id": "influence", "unit": [{"value": "1.40286074715e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.04.011": [{"id": "influence", "unit": [{"value": "1.43452379454e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.20736", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.008": [{"id": "influence", "unit": [{"value": "1.47851454712e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.265595636751", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.001": [{"id": "influence", "unit": [{"value": "1.53184569597e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.14752", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.002": [{"id": "influence", "unit": [{"value": "2.08976321636e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.77312", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.003": [{"id": "influence", "unit": [{"value": "1.5362319803e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4592", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.004": [{"id": "influence", "unit": [{"value": "1.43551360917e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2003.09.009": [{"id": "influence", "unit": [{"value": "2.07662142151e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17324559877", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.006": [{"id": "influence", "unit": [{"value": "1.38758095618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800770813": [{"id": "influence", "unit": [{"value": "5.43028907368e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43228064039", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.008": [{"id": "influence", "unit": [{"value": "2.48008956791e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.92696023518", "key": "score"}]}]} -{"10.1016/j.bspc.2014.05.007": [{"id": "influence", "unit": [{"value": "1.55569630314e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1616", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.003": [{"id": "influence", "unit": [{"value": "1.47652834699e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2048", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.001": [{"id": "influence", "unit": [{"value": "2.34941079685e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.64104412729", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.005": [{"id": "influence", "unit": [{"value": "1.46581848803e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.008": [{"id": "influence", "unit": [{"value": "1.48316418026e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.152", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.009": [{"id": "influence", "unit": [{"value": "1.55159297928e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3684096", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.009": [{"id": "influence", "unit": [{"value": "4.64407583043e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "25.0781621774", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.002": [{"id": "influence", "unit": [{"value": "1.84359097451e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.97892349444", "key": "score"}]}]} -{"10.1016/j.bspc.2014.06.010": [{"id": "influence", "unit": [{"value": "1.45882281845e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.29376", "key": "score"}]}]} -{"10.1002/bjs.1800830217": [{"id": "influence", "unit": [{"value": "2.66857838618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2939630017", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.001": [{"id": "influence", "unit": [{"value": "1.74339843714e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.47296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.002": [{"id": "influence", "unit": [{"value": "1.97864177754e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.8432", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.003": [{"id": "influence", "unit": [{"value": "1.43205516236e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.008223399936", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.010": [{"id": "influence", "unit": [{"value": "1.65211302766e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7844096", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.003": [{"id": "influence", "unit": [{"value": "1.70791309048e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1648", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.005": [{"id": "influence", "unit": [{"value": "1.46148049345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32003656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.004": [{"id": "influence", "unit": [{"value": "2.41893049083e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.14408257536", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.004": [{"id": "influence", "unit": [{"value": "1.48940446704e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.349264532144", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.006": [{"id": "influence", "unit": [{"value": "1.46865948655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7776", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.012": [{"id": "influence", "unit": [{"value": "3.32098166992e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.85721559518", "key": "score"}]}]} -{"10.1002/bjs.1800800838": [{"id": "influence", "unit": [{"value": "1.54541422129e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.614175851545", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.007": [{"id": "influence", "unit": [{"value": "1.86833555051e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.17539656158", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.006": [{"id": "influence", "unit": [{"value": "1.43480222367e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.010861337641", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.013": [{"id": "influence", "unit": [{"value": "1.73995286966e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2633472", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.009": [{"id": "influence", "unit": [{"value": "1.40352714524e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600073123169", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.014": [{"id": "influence", "unit": [{"value": "1.49761635864e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.525312", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.011": [{"id": "influence", "unit": [{"value": "1.82224540298e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.38312224634", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.005": [{"id": "influence", "unit": [{"value": "1.45660433811e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.176256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.012": [{"id": "influence", "unit": [{"value": "1.500939189e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.152", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.013": [{"id": "influence", "unit": [{"value": "1.45016686891e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0896", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.006": [{"id": "influence", "unit": [{"value": "3.83122757192e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.90968338432", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.175": [{"id": "influence", "unit": [{"value": "1.62468100526e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.193216", "key": "score"}]}]} -{"10.1002/bjs.1800770814": [{"id": "influence", "unit": [{"value": "1.56862848446e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.19369506404e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.07.014": [{"id": "influence", "unit": [{"value": "1.51143633444e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.12736", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.001": [{"id": "influence", "unit": [{"value": "1.70044408816e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.496832", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.004": [{"id": "influence", "unit": [{"value": "1.71093837745e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.56323656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.008": [{"id": "influence", "unit": [{"value": "1.97078859418e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.73177645056", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.005": [{"id": "influence", "unit": [{"value": "1.49780415935e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.006": [{"id": "influence", "unit": [{"value": "1.43503066091e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.009": [{"id": "influence", "unit": [{"value": "1.61204862976e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.6192", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.007": [{"id": "influence", "unit": [{"value": "1.62852962137e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.912996561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.010": [{"id": "influence", "unit": [{"value": "1.99662853046e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.311823872", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.009": [{"id": "influence", "unit": [{"value": "1.41070820507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1002/bjs.1800830218": [{"id": "influence", "unit": [{"value": "7.96286510199e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.519748681", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.010": [{"id": "influence", "unit": [{"value": "1.5802843193e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.96167312317", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.012": [{"id": "influence", "unit": [{"value": "3.840869763e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.41320899448", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.011": [{"id": "influence", "unit": [{"value": "1.42104594681e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.007": [{"id": "influence", "unit": [{"value": "1.44230090756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0060466176", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.013": [{"id": "influence", "unit": [{"value": "1.5863498967e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.14336", "key": "score"}]}]} -{"10.1016/j.bspc.2014.08.014": [{"id": "influence", "unit": [{"value": "2.31232021273e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.3952", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.013": [{"id": "influence", "unit": [{"value": "1.84917821836e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.62628397056", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.002": [{"id": "influence", "unit": [{"value": "1.48687602794e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.003": [{"id": "influence", "unit": [{"value": "1.41684524348e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.008": [{"id": "influence", "unit": [{"value": "1.63027919676e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0571237025385", "key": "score"}]}]} -{"10.1002/bjs.1800770815": [{"id": "influence", "unit": [{"value": "3.36222157107e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.136808818018", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.004": [{"id": "influence", "unit": [{"value": "1.95535934787e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.44099656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.06.014": [{"id": "influence", "unit": [{"value": "3.69147584024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.98048985088", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.005": [{"id": "influence", "unit": [{"value": "1.39845495997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.002": [{"id": "influence", "unit": [{"value": "1.80513074722e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.02760913374", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.006": [{"id": "influence", "unit": [{"value": "2.03447659693e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.09565256158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.007": [{"id": "influence", "unit": [{"value": "1.62927318462e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.61856", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.008": [{"id": "influence", "unit": [{"value": "1.54581024603e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.93696", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.009": [{"id": "influence", "unit": [{"value": "1.46435880325e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8352", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.003": [{"id": "influence", "unit": [{"value": "2.16395647262e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3727835136", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.010": [{"id": "influence", "unit": [{"value": "1.46868368579e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1002/bjs.1800830219": [{"id": "influence", "unit": [{"value": "2.50642815265e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0178183460684", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.011": [{"id": "influence", "unit": [{"value": "1.40482513888e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.004": [{"id": "influence", "unit": [{"value": "1.66018178917e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.248957035184", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.012": [{"id": "influence", "unit": [{"value": "1.44499446681e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32963656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.005": [{"id": "influence", "unit": [{"value": "2.13345493462e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08107354112", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.013": [{"id": "influence", "unit": [{"value": "1.76733860818e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.11136", "key": "score"}]}]} -{"10.1016/j.bspc.2014.09.015": [{"id": "influence", "unit": [{"value": "1.67138620507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.04576", "key": "score"}]}]} -{"10.1016/j.cad.2007.07.006": [{"id": "influence", "unit": [{"value": "3.44677020463e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.7206677347", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.002": [{"id": "influence", "unit": [{"value": "1.53338439518e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.003": [{"id": "influence", "unit": [{"value": "1.4975981112e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1696", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.005": [{"id": "influence", "unit": [{"value": "1.54039426793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89383312317", "key": "score"}]}]} -{"10.1002/bjs.1800800839": [{"id": "influence", "unit": [{"value": "1.91873295174e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48228758095e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.007": [{"id": "influence", "unit": [{"value": "1.47931059859e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.008": [{"id": "influence", "unit": [{"value": "2.01423051188e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.19054624634", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.001": [{"id": "influence", "unit": [{"value": "1.68693618434e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3580466176", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.009": [{"id": "influence", "unit": [{"value": "1.72914219669e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9568", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.015": [{"id": "influence", "unit": [{"value": "1.73360913543e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.93641216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.011": [{"id": "influence", "unit": [{"value": "1.43679218811e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.20736", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.012": [{"id": "influence", "unit": [{"value": "1.78061002271e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.70816", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.002": [{"id": "influence", "unit": [{"value": "1.61820004971e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.45097294234", "key": "score"}]}]} -{"10.1016/j.bspc.2014.10.013": [{"id": "influence", "unit": [{"value": "1.48765552864e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.96", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.001": [{"id": "influence", "unit": [{"value": "1.59737661395e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.05696", "key": "score"}]}]} -{"10.1002/bjs.1800830220": [{"id": "influence", "unit": [{"value": "3.8815829655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.16238449899", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.016": [{"id": "influence", "unit": [{"value": "2.78066873764e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7056", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.002": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.003": [{"id": "influence", "unit": [{"value": "1.90267173877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.55363656158", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.004": [{"id": "influence", "unit": [{"value": "1.50843971772e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.859236561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.003": [{"id": "influence", "unit": [{"value": "1.49853112689e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.152200912896", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.005": [{"id": "influence", "unit": [{"value": "1.52771917375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3488", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.008": [{"id": "influence", "unit": [{"value": "1.45847695161e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.004": [{"id": "influence", "unit": [{"value": "1.42787789848e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.009": [{"id": "influence", "unit": [{"value": "2.10890729982e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.62816", "key": "score"}]}]} -{"10.1016/j.bspc.2014.11.010": [{"id": "influence", "unit": [{"value": "1.57175886062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.7216", "key": "score"}]}]} -{"10.1002/bjs.1800770816": [{"id": "influence", "unit": [{"value": "2.71163748513e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.774674369216", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.005": [{"id": "influence", "unit": [{"value": "1.65151993295e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.14121216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.003": [{"id": "influence", "unit": [{"value": "1.52570480424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.032", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.006": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0279936", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.005": [{"id": "influence", "unit": [{"value": "2.71337763508e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "11.55936", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.006": [{"id": "influence", "unit": [{"value": "1.47106510598e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936", "key": "score"}]}]} -{"10.1016/j.cad.2007.08.007": [{"id": "influence", "unit": [{"value": "1.53777238981e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.06857312256", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.007": [{"id": "influence", "unit": [{"value": "1.40080939677e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.008": [{"id": "influence", "unit": [{"value": "1.89034298199e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.9216", "key": "score"}]}]} -{"10.1016/j.bspc.2014.12.012": [{"id": "influence", "unit": [{"value": "1.66752560965e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.31936", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.001": [{"id": "influence", "unit": [{"value": "1.46425603437e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936036561584", "key": "score"}]}]} -{"10.1002/bjs.1800800840": [{"id": "influence", "unit": [{"value": "3.67966558346e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.40760305102", "key": "score"}]}]} -{"10.1016/j.cad.2007.09.002": [{"id": "influence", "unit": [{"value": "1.46236770279e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.002176782336", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.002": [{"id": "influence", "unit": [{"value": "2.23567019421e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.30403656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.003": [{"id": "influence", "unit": [{"value": "1.47732821531e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.004": [{"id": "influence", "unit": [{"value": "1.4209459997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7296", "key": "score"}]}]} -{"10.1016/j.cad.2007.09.003": [{"id": "influence", "unit": [{"value": "2.71062666759e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.07416229888", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.005": [{"id": "influence", "unit": [{"value": "3.62317493769e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.3009096848", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.007": [{"id": "influence", "unit": [{"value": "1.91660266579e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.64576", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.008": [{"id": "influence", "unit": [{"value": "1.54764640563e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.392", "key": "score"}]}]} -{"10.1016/j.cad.2007.09.004": [{"id": "influence", "unit": [{"value": "1.53695181735e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0527026176", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.009": [{"id": "influence", "unit": [{"value": "1.47238808768e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.960036561584", "key": "score"}]}]} -{"10.1002/bjs.1800830221": [{"id": "influence", "unit": [{"value": "1.91362377636e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.850333866433", "key": "score"}]}]} -{"10.1016/j.bspc.2015.01.010": [{"id": "influence", "unit": [{"value": "1.41383950298e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.002": [{"id": "influence", "unit": [{"value": "1.77651789595e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.6436096", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.002": [{"id": "influence", "unit": [{"value": "1.5588275929e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.39776", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.009": [{"id": "influence", "unit": [{"value": "1.620251802e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.121944990745", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.003": [{"id": "influence", "unit": [{"value": "1.61509969244e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.008", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.003": [{"id": "influence", "unit": [{"value": "1.51109944205e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.004353564672", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.005": [{"id": "influence", "unit": [{"value": "1.61390186863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.2752", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.010": [{"id": "influence", "unit": [{"value": "2.34179271519e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.87600155892", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.004": [{"id": "influence", "unit": [{"value": "1.75892873601e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.291443982336", "key": "score"}]}]} -{"10.1016/j.bulm.2003.10.011": [{"id": "influence", "unit": [{"value": "1.95530022024e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08646015871", "key": "score"}]}]} -{"10.1002/bjs.1800830222": [{"id": "influence", "unit": [{"value": "1.53993358438e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.005": [{"id": "influence", "unit": [{"value": "2.03453090989e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.6413616128", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.006": [{"id": "influence", "unit": [{"value": "1.52048096498e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0609140736", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.001": [{"id": "influence", "unit": [{"value": "1.62846212143e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.741786539105", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.002": [{"id": "influence", "unit": [{"value": "8.49962253632e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "23.8113807648", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.007": [{"id": "influence", "unit": [{"value": "1.77202790365e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.41948928", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.008": [{"id": "influence", "unit": [{"value": "1.66784168106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.72154447872", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.003": [{"id": "influence", "unit": [{"value": "1.91796802049e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.568225265648", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.009": [{"id": "influence", "unit": [{"value": "2.44964116737e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.53599721472", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.007": [{"id": "influence", "unit": [{"value": "1.40307547394e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.004": [{"id": "influence", "unit": [{"value": "1.66189630404e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.883877179392", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1900": [{"id": "influence", "unit": [{"value": "1.41563040465e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.406656", "key": "score"}]}]} -{"10.1002/bjs.1800770817": [{"id": "influence", "unit": [{"value": "2.3414812571e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0769975141418", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.010": [{"id": "influence", "unit": [{"value": "1.5758423276e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2102962176", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.008": [{"id": "influence", "unit": [{"value": "1.48556993479e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.009": [{"id": "influence", "unit": [{"value": "2.44107151341e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.08323656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.011": [{"id": "influence", "unit": [{"value": "1.5461901653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.98336", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.011": [{"id": "influence", "unit": [{"value": "1.56002105062e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2387803136", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.012": [{"id": "influence", "unit": [{"value": "3.32215062786e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "15.2602696848", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.005": [{"id": "influence", "unit": [{"value": "1.52580829037e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0655418710426", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.017": [{"id": "influence", "unit": [{"value": "2.83150482952e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.18112656317", "key": "score"}]}]} -{"10.1016/j.bspc.2015.02.014": [{"id": "influence", "unit": [{"value": "1.52509378424e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.43786968475", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.012": [{"id": "influence", "unit": [{"value": "2.21262421374e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.43258413056", "key": "score"}]}]} -{"10.1002/bjs.1800830223": [{"id": "influence", "unit": [{"value": "1.61300371139e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0535883391846", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.001": [{"id": "influence", "unit": [{"value": "1.50323968183e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43776", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.006": [{"id": "influence", "unit": [{"value": "1.79329991872e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.22408158121", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.018": [{"id": "influence", "unit": [{"value": "1.76307398045e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.44637696", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.002": [{"id": "influence", "unit": [{"value": "1.56246121871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.9248", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.003": [{"id": "influence", "unit": [{"value": "1.4724851416e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.013": [{"id": "influence", "unit": [{"value": "2.37373458092e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.60366727168", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.004": [{"id": "influence", "unit": [{"value": "1.63634379092e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9712", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.007": [{"id": "influence", "unit": [{"value": "1.73920228056e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.77834429874", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.005": [{"id": "influence", "unit": [{"value": "1.44059659429e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.019": [{"id": "influence", "unit": [{"value": "3.3773334495e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.60914034075", "key": "score"}]}]} -{"10.1002/bjs.1800800841": [{"id": "influence", "unit": [{"value": "3.8141878057e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.5575088231", "key": "score"}]}]} -{"10.1016/j.cad.2007.10.014": [{"id": "influence", "unit": [{"value": "2.91623266242e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.58448456158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.006": [{"id": "influence", "unit": [{"value": "1.66124365782e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.3488", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.007": [{"id": "influence", "unit": [{"value": "1.51852019221e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936036561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.001": [{"id": "influence", "unit": [{"value": "1.60137140701e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.223242448896", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.008": [{"id": "influence", "unit": [{"value": "1.53291950533e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.bulm.2003.11.008": [{"id": "influence", "unit": [{"value": "1.90840217859e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.24232596567", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.021": [{"id": "influence", "unit": [{"value": "3.02280680133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.009505792", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.009": [{"id": "influence", "unit": [{"value": "1.54105672704e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.92", "key": "score"}]}]} -{"10.1016/j.bspc.2015.03.011": [{"id": "influence", "unit": [{"value": "1.42787789848e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.003": [{"id": "influence", "unit": [{"value": "1.95168880935e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12353159168", "key": "score"}]}]} -{"10.1002/bjs.1800830224": [{"id": "influence", "unit": [{"value": "1.71803713801e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.078672354467", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.001": [{"id": "influence", "unit": [{"value": "1.51853931787e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.13221580699", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.001": [{"id": "influence", "unit": [{"value": "1.61872367055e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.864", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.022": [{"id": "influence", "unit": [{"value": "2.32774512842e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.97683016158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.002": [{"id": "influence", "unit": [{"value": "1.5366499674e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.19939656158", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.004": [{"id": "influence", "unit": [{"value": "2.73418800808e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.24225581534", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.003": [{"id": "influence", "unit": [{"value": "1.51579974618e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3056", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.023": [{"id": "influence", "unit": [{"value": "1.59696268486e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.814327296", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.002": [{"id": "influence", "unit": [{"value": "1.61076734804e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.348928244122", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.004": [{"id": "influence", "unit": [{"value": "2.01234339106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.6544", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.005": [{"id": "influence", "unit": [{"value": "3.32115282661e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.32880456158", "key": "score"}]}]} -{"10.1002/bjs.1800770818": [{"id": "influence", "unit": [{"value": "5.68780647438e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1338624097", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.005": [{"id": "influence", "unit": [{"value": "1.59419814162e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0656", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.024": [{"id": "influence", "unit": [{"value": "1.76193844694e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.793627857584", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.003": [{"id": "influence", "unit": [{"value": "1.57560737594e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0326025021358", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.006": [{"id": "influence", "unit": [{"value": "1.42561558413e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.489636561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.006": [{"id": "influence", "unit": [{"value": "1.52287071881e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.79776", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.007": [{"id": "influence", "unit": [{"value": "1.50665203883e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.38336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.10.025": [{"id": "influence", "unit": [{"value": "1.71087944907e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08354376158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.008": [{"id": "influence", "unit": [{"value": "1.46951625461e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.648036561584", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.007": [{"id": "influence", "unit": [{"value": "2.32915485084e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3229169705", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.004": [{"id": "influence", "unit": [{"value": "2.36578941835e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.53322412737", "key": "score"}]}]} -{"10.1002/bjs.1800830225": [{"id": "influence", "unit": [{"value": "1.62214790144e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00742227195463", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.009": [{"id": "influence", "unit": [{"value": "1.56048905668e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.416", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.001": [{"id": "influence", "unit": [{"value": "1.48348846065e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.202873856", "key": "score"}]}]} -{"10.1016/j.bulm.2003.12.005": [{"id": "influence", "unit": [{"value": "2.30836821787e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.60349958079", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.008": [{"id": "influence", "unit": [{"value": "1.70229830807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.84174336", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.010": [{"id": "influence", "unit": [{"value": "1.59830110366e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.63779656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.002": [{"id": "influence", "unit": [{"value": "1.93327034893e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.56053985758", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.009": [{"id": "influence", "unit": [{"value": "1.91549602574e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.36680192", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.001": [{"id": "influence", "unit": [{"value": "1.45033751388e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.02378336256", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.012": [{"id": "influence", "unit": [{"value": "1.41990928722e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.013": [{"id": "influence", "unit": [{"value": "1.41756265041e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1002/bjs.1800770820": [{"id": "influence", "unit": [{"value": "3.02434333285e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.498025265422", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.014": [{"id": "influence", "unit": [{"value": "2.87658919937e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.1904", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.002": [{"id": "influence", "unit": [{"value": "1.98851018342e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.05410185468", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.010": [{"id": "influence", "unit": [{"value": "3.31135702336e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.38206042112", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.004": [{"id": "influence", "unit": [{"value": "1.93237516379e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.268672", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.015": [{"id": "influence", "unit": [{"value": "1.5940790328e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7136", "key": "score"}]}]} -{"10.1016/j.bspc.2015.04.016": [{"id": "influence", "unit": [{"value": "1.42020748002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.003": [{"id": "influence", "unit": [{"value": "1.57227629941e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.443832547738", "key": "score"}]}]} -{"10.1016/j.cad.2007.11.011": [{"id": "influence", "unit": [{"value": "1.66228240489e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.55587117056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.001": [{"id": "influence", "unit": [{"value": "1.74463551248e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.3856", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.002": [{"id": "influence", "unit": [{"value": "1.68432360071e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0976", "key": "score"}]}]} -{"10.1002/bjs.1800830226": [{"id": "influence", "unit": [{"value": "1.41634774979e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.89730223054e-06", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.004": [{"id": "influence", "unit": [{"value": "1.51058289962e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.059532876841", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.003": [{"id": "influence", "unit": [{"value": "1.41460571706e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.3456", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.001": [{"id": "influence", "unit": [{"value": "2.11112110818e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.79394605056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.004": [{"id": "influence", "unit": [{"value": "1.51932372739e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.921636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.005": [{"id": "influence", "unit": [{"value": "1.54252858635e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.68256", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.006": [{"id": "influence", "unit": [{"value": "1.63811523553e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.46730968475", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.005": [{"id": "influence", "unit": [{"value": "1.79905502333e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4531700736", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.007": [{"id": "influence", "unit": [{"value": "1.86500741891e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.9392", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.008": [{"id": "influence", "unit": [{"value": "1.95218690528e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.87203656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.01.006": [{"id": "influence", "unit": [{"value": "2.11480233946e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.09623861953", "key": "score"}]}]} -{"10.1002/bjs.1800800842": [{"id": "influence", "unit": [{"value": "3.19483524181e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.815376816948", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.011": [{"id": "influence", "unit": [{"value": "1.65687177507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.95939656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.012": [{"id": "influence", "unit": [{"value": "1.81235414386e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.14407312317", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.013": [{"id": "influence", "unit": [{"value": "1.58278476219e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.9056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.014": [{"id": "influence", "unit": [{"value": "2.49273287575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.1731565616", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.001": [{"id": "influence", "unit": [{"value": "7.17973794162e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "19.0059453231", "key": "score"}]}]} -{"10.1016/j.bspc.2015.05.015": [{"id": "influence", "unit": [{"value": "1.44480835961e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.15552", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.001": [{"id": "influence", "unit": [{"value": "2.03198801445e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.15207312317", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.002": [{"id": "influence", "unit": [{"value": "1.52151117534e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1213056", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.002": [{"id": "influence", "unit": [{"value": "1.47038759381e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.4896", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.003": [{"id": "influence", "unit": [{"value": "1.68691283611e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.0496", "key": "score"}]}]} -{"10.1002/bjs.1800830228": [{"id": "influence", "unit": [{"value": "3.4727344925e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.52941705521", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.004": [{"id": "influence", "unit": [{"value": "1.50566905194e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.32003656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.008": [{"id": "influence", "unit": [{"value": "1.49587208412e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.08", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.003": [{"id": "influence", "unit": [{"value": "1.47097833801e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00891997028352", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.009": [{"id": "influence", "unit": [{"value": "1.7180686851e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.31367312317", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.012": [{"id": "influence", "unit": [{"value": "1.59031474173e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.392", "key": "score"}]}]} -{"10.1016/j.bspc.2015.06.014": [{"id": "influence", "unit": [{"value": "1.46562526844e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.56003656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.001": [{"id": "influence", "unit": [{"value": "2.17323207482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.032", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.004": [{"id": "influence", "unit": [{"value": "1.44142980291e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.002176782336", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.002": [{"id": "influence", "unit": [{"value": "1.62780811555e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7376", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.003": [{"id": "influence", "unit": [{"value": "1.49732150645e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03779656158", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.1907": [{"id": "influence", "unit": [{"value": "1.44064265002e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.23328", "key": "score"}]}]} -{"10.1002/bjs.1800770822": [{"id": "influence", "unit": [{"value": "2.53706984013e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.16406102839", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.004": [{"id": "influence", "unit": [{"value": "2.03252752514e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.23683656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.006": [{"id": "influence", "unit": [{"value": "1.41406819359e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.31231688013e-05", "key": "score"}]}]} -{"10.1016/j.bspc.2015.07.007": [{"id": "influence", "unit": [{"value": "1.4134548894e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.005": [{"id": "influence", "unit": [{"value": "1.92096996879e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.798504402748", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.001": [{"id": "influence", "unit": [{"value": "1.52571226108e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.5792", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.002": [{"id": "influence", "unit": [{"value": "1.92313873921e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.216", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.003": [{"id": "influence", "unit": [{"value": "1.47833902387e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.478656", "key": "score"}]}]} -{"10.1016/j.bulm.2004.02.006": [{"id": "influence", "unit": [{"value": "1.99942885863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.58838221914", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.003": [{"id": "influence", "unit": [{"value": "1.45383931482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.004": [{"id": "influence", "unit": [{"value": "1.74136707644e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.82727312317", "key": "score"}]}]} -{"10.1002/bjs.1800800843": [{"id": "influence", "unit": [{"value": "1.90295946113e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0467163704882", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.005": [{"id": "influence", "unit": [{"value": "1.66535805466e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4976", "key": "score"}]}]} -{"10.1016/j.bspc.2015.08.006": [{"id": "influence", "unit": [{"value": "1.55929432612e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.032", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.001": [{"id": "influence", "unit": [{"value": "1.67638603843e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.335936546243", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.002": [{"id": "influence", "unit": [{"value": "2.54499796895e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.95687312317", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.004": [{"id": "influence", "unit": [{"value": "1.68358939362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.181941313536", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.003": [{"id": "influence", "unit": [{"value": "2.03781614756e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.25603656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.002": [{"id": "influence", "unit": [{"value": "3.20375349871e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.16853148467", "key": "score"}]}]} -{"10.1016/j.cad.2007.12.006": [{"id": "influence", "unit": [{"value": "1.58270393634e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.988509696", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.004": [{"id": "influence", "unit": [{"value": "1.59625617331e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.004": [{"id": "influence", "unit": [{"value": "6.04960747641e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.6734049382", "key": "score"}]}]} -{"10.1002/bjs.1800830229": [{"id": "influence", "unit": [{"value": "1.55959196631e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.641264975971", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.005": [{"id": "influence", "unit": [{"value": "1.40353242616e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.002": [{"id": "influence", "unit": [{"value": "1.71408534078e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2718196736", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.008": [{"id": "influence", "unit": [{"value": "1.63434633767e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.33923656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.005": [{"id": "influence", "unit": [{"value": "1.98027269671e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.610709669315", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.009": [{"id": "influence", "unit": [{"value": "1.42215367623e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.006": [{"id": "influence", "unit": [{"value": "1.66410516017e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.429584309322", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.010": [{"id": "influence", "unit": [{"value": "1.46447099277e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.600073123169", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.003": [{"id": "influence", "unit": [{"value": "1.67426183375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3258493952", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.011": [{"id": "influence", "unit": [{"value": "1.56363960352e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.1056", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.004": [{"id": "influence", "unit": [{"value": "3.59749667999e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.23073974272", "key": "score"}]}]} -{"10.1002/bjs.1800770823": [{"id": "influence", "unit": [{"value": "6.6892359492e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.53423462924", "key": "score"}]}]} -{"10.1016/j.bspc.2015.09.012": [{"id": "influence", "unit": [{"value": "1.88010720036e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.71459656158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.007": [{"id": "influence", "unit": [{"value": "1.96574804476e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9119619865", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.002": [{"id": "influence", "unit": [{"value": "1.44345474828e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1296", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.003": [{"id": "influence", "unit": [{"value": "1.57853853228e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.936036561584", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.005": [{"id": "influence", "unit": [{"value": "1.5461465729e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7057536", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.007": [{"id": "influence", "unit": [{"value": "1.62379020748e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.04", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.008": [{"id": "influence", "unit": [{"value": "1.59401878593e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.980407076684", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.006": [{"id": "influence", "unit": [{"value": "4.46655442013e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.70952494013", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.005": [{"id": "influence", "unit": [{"value": "2.23161161778e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.0879872", "key": "score"}]}]} -{"10.1016/j.bulm.2004.03.009": [{"id": "influence", "unit": [{"value": "2.1096199285e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.48547788882", "key": "score"}]}]} -{"10.1002/bjs.1800830230": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.008": [{"id": "influence", "unit": [{"value": "1.46111297696e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.078656", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.001": [{"id": "influence", "unit": [{"value": "3.00706733095e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.46231057538", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.006": [{"id": "influence", "unit": [{"value": "1.6439374667e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3439026176", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.009": [{"id": "influence", "unit": [{"value": "2.69719564856e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.0302952489", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.002": [{"id": "influence", "unit": [{"value": "1.52911014415e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0349070606991", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.007": [{"id": "influence", "unit": [{"value": "1.85263367415e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.8166528", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.010": [{"id": "influence", "unit": [{"value": "1.45469401165e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0228427776", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.003": [{"id": "influence", "unit": [{"value": "1.89493519627e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.24009994797", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.008": [{"id": "influence", "unit": [{"value": "2.59517459944e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.915704832", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.011": [{"id": "influence", "unit": [{"value": "1.86046459817e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.727650816", "key": "score"}]}]} -{"10.1002/bjs.1800800844": [{"id": "influence", "unit": [{"value": "1.57047800382e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000479472211999", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.004": [{"id": "influence", "unit": [{"value": "2.60177802378e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.636079226881", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.008": [{"id": "influence", "unit": [{"value": "1.90122612426e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.2208", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.011": [{"id": "influence", "unit": [{"value": "2.08997502877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.17767312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.012": [{"id": "influence", "unit": [{"value": "2.05274022656e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.29705189376", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.012": [{"id": "influence", "unit": [{"value": "1.59881289625e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.992", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.009": [{"id": "influence", "unit": [{"value": "2.03099988159e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.38091049917", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.005": [{"id": "influence", "unit": [{"value": "1.43984594063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.036216999936", "key": "score"}]}]} -{"10.1016/j.bspc.2015.10.013": [{"id": "influence", "unit": [{"value": "1.43047895994e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.013": [{"id": "influence", "unit": [{"value": "1.67511747031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.74549811678", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.001": [{"id": "influence", "unit": [{"value": "1.58287888635e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1296", "key": "score"}]}]} -{"10.1002/bjs.1800770824": [{"id": "influence", "unit": [{"value": "1.74510094006e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000532336899516", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.004": [{"id": "influence", "unit": [{"value": "1.8157272332e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.4112", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.014": [{"id": "influence", "unit": [{"value": "1.97688539752e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.58003306974", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.010": [{"id": "influence", "unit": [{"value": "1.62301306315e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.08803656158", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.011": [{"id": "influence", "unit": [{"value": "1.53599734551e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.5248", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.012": [{"id": "influence", "unit": [{"value": "1.46274993269e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.360036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2004.04.006": [{"id": "influence", "unit": [{"value": "1.77932635596e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.01969180672", "key": "score"}]}]} -{"10.1016/j.cad.2008.01.015": [{"id": "influence", "unit": [{"value": "4.42373196686e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.2695436288", "key": "score"}]}]} -{"10.1016/j.bspc.2015.11.013": [{"id": "influence", "unit": [{"value": "1.5216135019e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.705636561584", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.001": [{"id": "influence", "unit": [{"value": "1.51955420807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.2848", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.002": [{"id": "influence", "unit": [{"value": "2.05886786946e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2938775552", "key": "score"}]}]} -{"10.1002/bjs.1800830231": [{"id": "influence", "unit": [{"value": "1.69616413352e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.682679489185", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.004": [{"id": "influence", "unit": [{"value": "1.52306827774e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00129038820994", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.004": [{"id": "influence", "unit": [{"value": "1.72175768323e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.04963656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.003": [{"id": "influence", "unit": [{"value": "2.00163763167e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.95973175296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.005": [{"id": "influence", "unit": [{"value": "1.96690672988e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.743240150286", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.005": [{"id": "influence", "unit": [{"value": "1.56332373874e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.616", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.004": [{"id": "influence", "unit": [{"value": "2.68914111801e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.9750194176", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.007": [{"id": "influence", "unit": [{"value": "1.61462321853e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.648", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.006": [{"id": "influence", "unit": [{"value": "1.43605288287e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0101142575844", "key": "score"}]}]} -{"10.1016/j.bspc.2015.12.009": [{"id": "influence", "unit": [{"value": "1.49796866189e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.005": [{"id": "influence", "unit": [{"value": "1.42085717449e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1002/bjs.1800770825": [{"id": "influence", "unit": [{"value": "1.72515289261e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.423529266594", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.001": [{"id": "influence", "unit": [{"value": "1.73560539135e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7808", "key": "score"}]}]} -{"10.1016/j.bulm.2004.05.008": [{"id": "influence", "unit": [{"value": "1.44769579767e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0101508191688", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.006": [{"id": "influence", "unit": [{"value": "3.09614821228e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2833864704", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.002": [{"id": "influence", "unit": [{"value": "1.40768568577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.007": [{"id": "influence", "unit": [{"value": "1.90393509315e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.05185556958", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.001": [{"id": "influence", "unit": [{"value": "1.54298421438e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.054879399936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.004": [{"id": "influence", "unit": [{"value": "1.46115744106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7056", "key": "score"}]}]} -{"10.1016/j.cad.2008.02.008": [{"id": "influence", "unit": [{"value": "2.09592527715e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.555173012144", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.003": [{"id": "influence", "unit": [{"value": "1.57772239494e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0383067109786", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.010": [{"id": "influence", "unit": [{"value": "2.04492895381e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.07702548958", "key": "score"}]}]} -{"10.1002/bjs.1800800845": [{"id": "influence", "unit": [{"value": "2.05111097877e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.748931806934", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.005": [{"id": "influence", "unit": [{"value": "1.5181247267e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7296", "key": "score"}]}]} -{"10.1016/j.cad.2008.03.001": [{"id": "influence", "unit": [{"value": "2.22361376969e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.20860082176", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.010": [{"id": "influence", "unit": [{"value": "2.15762004908e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1616", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.004": [{"id": "influence", "unit": [{"value": "2.45299305703e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.34412580667", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.011": [{"id": "influence", "unit": [{"value": "1.66637169233e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.071177216", "key": "score"}]}]} -{"10.1016/j.cad.2008.03.002": [{"id": "influence", "unit": [{"value": "2.12247153179e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.91892109312", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.011": [{"id": "influence", "unit": [{"value": "1.70824299336e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.79534624634", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.005": [{"id": "influence", "unit": [{"value": "4.73383952013e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.6509988749", "key": "score"}]}]} -{"10.1016/j.cad.2008.03.003": [{"id": "influence", "unit": [{"value": "1.81700367369e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.516087296", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.012": [{"id": "influence", "unit": [{"value": "1.49995724598e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.450233856", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.196": [{"id": "influence", "unit": [{"value": "1.41421399515e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.046656", "key": "score"}]}]} -{"10.1002/bjs.1800830232": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bspc.2016.01.013": [{"id": "influence", "unit": [{"value": "1.4427952674e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216036561584", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.001": [{"id": "influence", "unit": [{"value": "1.47539204854e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0527026176", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.006": [{"id": "influence", "unit": [{"value": "2.97311157113e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.5246956352", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.003": [{"id": "influence", "unit": [{"value": "1.40070897441e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.013": [{"id": "influence", "unit": [{"value": "2.4911500399e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.9569268832", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.006": [{"id": "influence", "unit": [{"value": "1.71702229619e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.1216", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.004": [{"id": "influence", "unit": [{"value": "3.05797070316e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.96109521374", "key": "score"}]}]} -{"10.1016/j.bulm.2004.06.007": [{"id": "influence", "unit": [{"value": "2.09569501031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.69190753748", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.007": [{"id": "influence", "unit": [{"value": "2.18615295637e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.60487312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.017": [{"id": "influence", "unit": [{"value": "1.5747753576e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.43566336", "key": "score"}]}]} -{"10.1002/bjs.1800770826": [{"id": "influence", "unit": [{"value": "1.42464622401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.02349036908e-06", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.005": [{"id": "influence", "unit": [{"value": "1.8512784976e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2022628352", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.008": [{"id": "influence", "unit": [{"value": "1.90646183065e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.616", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.001": [{"id": "influence", "unit": [{"value": "1.65178100351e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.018756889674", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.018": [{"id": "influence", "unit": [{"value": "2.10323641562e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.426622976", "key": "score"}]}]} -{"10.1016/j.bspc.2016.02.009": [{"id": "influence", "unit": [{"value": "1.58797981286e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.41123656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.006": [{"id": "influence", "unit": [{"value": "2.31648466547e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7242642432", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.004": [{"id": "influence", "unit": [{"value": "2.48984766568e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.88448775304", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.019": [{"id": "influence", "unit": [{"value": "1.52267206004e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.6039936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.03.001": [{"id": "influence", "unit": [{"value": "1.6956111114e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.23363656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.05.007": [{"id": "influence", "unit": [{"value": "1.74859637039e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.689467392", "key": "score"}]}]} -{"10.1002/bjs.1800830233": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.11.020": [{"id": "influence", "unit": [{"value": "2.19142078922e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.73152", "key": "score"}]}]} -{"10.1016/j.bspc.2016.03.002": [{"id": "influence", "unit": [{"value": "1.49353114625e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4592", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.005": [{"id": "influence", "unit": [{"value": "2.00631619007e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.45052106975", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.002": [{"id": "influence", "unit": [{"value": "1.80280869063e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33571041758", "key": "score"}]}]} -{"10.1016/j.bspc.2016.04.001": [{"id": "influence", "unit": [{"value": "1.47432609805e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bulm.2004.07.006": [{"id": "influence", "unit": [{"value": "1.64580561258e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.926956302336", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.001": [{"id": "influence", "unit": [{"value": "1.9386600371e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.392983761584", "key": "score"}]}]} -{"10.1016/j.bspc.2016.04.002": [{"id": "influence", "unit": [{"value": "1.77732240919e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.001": [{"id": "influence", "unit": [{"value": "3.65276112913e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.34267874689", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.002": [{"id": "influence", "unit": [{"value": "1.67228822033e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.059357696", "key": "score"}]}]} -{"10.1002/bjs.1800800846": [{"id": "influence", "unit": [{"value": "1.87426454575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000824941606699", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.003": [{"id": "influence", "unit": [{"value": "3.08990827031e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1503314944", "key": "score"}]}]} -{"10.1016/j.bspc.2016.04.004": [{"id": "influence", "unit": [{"value": "1.55323901681e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.89607312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.004": [{"id": "influence", "unit": [{"value": "3.21801438375e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.8529477632", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.003": [{"id": "influence", "unit": [{"value": "1.59273285306e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.175314287002", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.001": [{"id": "influence", "unit": [{"value": "1.4242561143e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.003": [{"id": "influence", "unit": [{"value": "2.43015778095e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.805398016", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.002": [{"id": "influence", "unit": [{"value": "1.76239322363e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.544", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.004": [{"id": "influence", "unit": [{"value": "1.54566569959e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.33416696732", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.005": [{"id": "influence", "unit": [{"value": "2.31299690805e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.32781914112", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.004": [{"id": "influence", "unit": [{"value": "2.17857956905e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.86970993118", "key": "score"}]}]} -{"10.1002/bjs.1800830234": [{"id": "influence", "unit": [{"value": "1.93299516968e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.410114635488", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.003": [{"id": "influence", "unit": [{"value": "1.53173355172e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.416", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.006": [{"id": "influence", "unit": [{"value": "2.99072062287e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.99053536189", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.005": [{"id": "influence", "unit": [{"value": "2.06889594575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.256080896", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.004": [{"id": "influence", "unit": [{"value": "2.36226956363e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "12.4432", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.007": [{"id": "influence", "unit": [{"value": "3.14923020973e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.691949568", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.005": [{"id": "influence", "unit": [{"value": "2.03490674279e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.76807312317", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.006": [{"id": "influence", "unit": [{"value": "1.97919772747e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.8754523136", "key": "score"}]}]} -{"10.1016/j.cad.2008.06.008": [{"id": "influence", "unit": [{"value": "2.25742850662e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.10430813595", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.006": [{"id": "influence", "unit": [{"value": "1.78325655416e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.24007312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.001": [{"id": "influence", "unit": [{"value": "1.94166917577e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.134853739184", "key": "score"}]}]} -{"10.1002/bjs.1800770827": [{"id": "influence", "unit": [{"value": "1.76320623916e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.254133214525", "key": "score"}]}]} -{"10.1016/j.bspc.2016.05.009": [{"id": "influence", "unit": [{"value": "1.7698892709e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.136", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.007": [{"id": "influence", "unit": [{"value": "2.01560366604e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.606272", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.002": [{"id": "influence", "unit": [{"value": "3.17149463925e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.09275075038", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.001": [{"id": "influence", "unit": [{"value": "1.53072410751e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.896", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.008": [{"id": "influence", "unit": [{"value": "1.4541483793e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0060466176", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.003": [{"id": "influence", "unit": [{"value": "1.40933402967e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.003": [{"id": "influence", "unit": [{"value": "2.9979841587e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.926787936944", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.004": [{"id": "influence", "unit": [{"value": "1.40645973e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.009": [{"id": "influence", "unit": [{"value": "1.7132748437e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.362432721584", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.004": [{"id": "influence", "unit": [{"value": "2.50164812326e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.88192223232", "key": "score"}]}]} -{"10.1002/bjs.1800800847": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.005": [{"id": "influence", "unit": [{"value": "1.74665165591e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.4816", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.005": [{"id": "influence", "unit": [{"value": "2.45696962307e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.99574209203", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.005": [{"id": "influence", "unit": [{"value": "1.59559459456e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.50693363712", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.010": [{"id": "influence", "unit": [{"value": "1.48583201429e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.056733696", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.006": [{"id": "influence", "unit": [{"value": "1.52398390667e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.156473856", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.006": [{"id": "influence", "unit": [{"value": "1.4411957679e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.006": [{"id": "influence", "unit": [{"value": "2.02062687478e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.43986364416", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.007": [{"id": "influence", "unit": [{"value": "1.4074035125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.011": [{"id": "influence", "unit": [{"value": "1.85417577652e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1984587776", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.009": [{"id": "influence", "unit": [{"value": "1.75006832271e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.833871622963", "key": "score"}]}]} -{"10.1002/bjs.1800830235": [{"id": "influence", "unit": [{"value": "4.57466352678e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.34267976794", "key": "score"}]}]} -{"10.1016/j.cad.2008.07.007": [{"id": "influence", "unit": [{"value": "1.6746709597e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.207396561584", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.010": [{"id": "influence", "unit": [{"value": "1.5121257956e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2004.08.010": [{"id": "influence", "unit": [{"value": "1.80911809391e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.00599210096", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.012": [{"id": "influence", "unit": [{"value": "1.69114717345e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.291776", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.001": [{"id": "influence", "unit": [{"value": "2.10035086605e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2657166336", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.012": [{"id": "influence", "unit": [{"value": "1.69779333017e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.72", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.002": [{"id": "influence", "unit": [{"value": "1.78162325028e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.1503188566", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.003": [{"id": "influence", "unit": [{"value": "1.65953020168e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.5175936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.013": [{"id": "influence", "unit": [{"value": "1.65255327378e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.88007312317", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.014": [{"id": "influence", "unit": [{"value": "1.4134548894e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1002/bjs.1800770828": [{"id": "influence", "unit": [{"value": "1.86514259401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.833593408536", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.003": [{"id": "influence", "unit": [{"value": "1.97562905655e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.55319395228", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.004": [{"id": "influence", "unit": [{"value": "2.15409428027e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.509453312", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.015": [{"id": "influence", "unit": [{"value": "1.57786086387e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.92963656158", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.016": [{"id": "influence", "unit": [{"value": "1.50216812068e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.4496", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.004": [{"id": "influence", "unit": [{"value": "1.43075299138e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00683025924096", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.005": [{"id": "influence", "unit": [{"value": "1.91398146851e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0908104751844", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.017": [{"id": "influence", "unit": [{"value": "1.43750163699e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.005": [{"id": "influence", "unit": [{"value": "1.85585291498e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.456078732756", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.006": [{"id": "influence", "unit": [{"value": "2.51055931908e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.33129063357", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.018": [{"id": "influence", "unit": [{"value": "1.56828824848e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49603656158", "key": "score"}]}]} -{"10.1002/bjs.1800830236": [{"id": "influence", "unit": [{"value": "2.10974073898e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0634013861571", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.007": [{"id": "influence", "unit": [{"value": "1.57639934401e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.7862962176", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.006": [{"id": "influence", "unit": [{"value": "1.7374230549e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.2753957615", "key": "score"}]}]} -{"10.1016/j.bspc.2016.06.020": [{"id": "influence", "unit": [{"value": "1.84666648997e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.752", "key": "score"}]}]} -{"10.1016/j.cad.2008.08.008": [{"id": "influence", "unit": [{"value": "2.15821065694e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.2964569088", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.008": [{"id": "influence", "unit": [{"value": "1.56115937004e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0796495967688", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.002": [{"id": "influence", "unit": [{"value": "1.47691612934e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.216", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.001": [{"id": "influence", "unit": [{"value": "1.45887434945e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1016/j.bulm.2004.09.009": [{"id": "influence", "unit": [{"value": "4.57902670991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.11395509574", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.003": [{"id": "influence", "unit": [{"value": "1.71436278381e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.392", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.002": [{"id": "influence", "unit": [{"value": "1.5539682507e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.2289586176", "key": "score"}]}]} -{"10.1001/archpediatrics.2012.199": [{"id": "influence", "unit": [{"value": "1.71476015775e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.90165576158", "key": "score"}]}]} -{"10.1002/bjs.1800800849": [{"id": "influence", "unit": [{"value": "3.09530027559e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.49082768433", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.001": [{"id": "influence", "unit": [{"value": "1.55886217247e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.023277059113", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.003": [{"id": "influence", "unit": [{"value": "1.57617414217e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.424633856", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.005": [{"id": "influence", "unit": [{"value": "1.95192226147e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.224", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.003": [{"id": "influence", "unit": [{"value": "1.92367459178e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.528195031565", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.004": [{"id": "influence", "unit": [{"value": "2.17373591838e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.37798875136", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.006": [{"id": "influence", "unit": [{"value": "1.61114213421e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.312", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.004": [{"id": "influence", "unit": [{"value": "2.33702357833e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.63762464588", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.005": [{"id": "influence", "unit": [{"value": "2.69563597746e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.2866942976", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.008": [{"id": "influence", "unit": [{"value": "1.73096493179e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.1216", "key": "score"}]}]} -{"10.1016/j.bulm.2004.10.005": [{"id": "influence", "unit": [{"value": "1.50907350045e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.102595742761", "key": "score"}]}]} -{"10.1002/bjs.1800830237": [{"id": "influence", "unit": [{"value": "2.31106043214e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.210221241405", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.010": [{"id": "influence", "unit": [{"value": "1.91545566337e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.072", "key": "score"}]}]} -{"10.1016/j.cad.2008.09.006": [{"id": "influence", "unit": [{"value": "1.59244027261e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.49161517056", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.011": [{"id": "influence", "unit": [{"value": "1.50397644161e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.001": [{"id": "influence", "unit": [{"value": "1.94763030991e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.32440162861", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.014": [{"id": "influence", "unit": [{"value": "1.48124323768e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.432", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.002": [{"id": "influence", "unit": [{"value": "3.00494323511e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.4027693056", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.002": [{"id": "influence", "unit": [{"value": "2.53857687295e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.74619584076", "key": "score"}]}]} -{"10.1016/j.bspc.2016.07.015": [{"id": "influence", "unit": [{"value": "1.57036055242e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.632", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.003": [{"id": "influence", "unit": [{"value": "1.80864486244e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.19054592", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.003": [{"id": "influence", "unit": [{"value": "1.64757827667e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.185750234726", "key": "score"}]}]} -{"10.1002/bjs.1800770830": [{"id": "influence", "unit": [{"value": "2.1717988111e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.00307941311092", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.002": [{"id": "influence", "unit": [{"value": "1.57522435727e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.53603656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.004": [{"id": "influence", "unit": [{"value": "2.20693385362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.53278259678", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.008": [{"id": "influence", "unit": [{"value": "1.46628397755e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.004": [{"id": "influence", "unit": [{"value": "1.44180245944e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.004353564672", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.005": [{"id": "influence", "unit": [{"value": "4.21350036174e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.3617120256", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.010": [{"id": "influence", "unit": [{"value": "1.57228551928e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.992", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.013": [{"id": "influence", "unit": [{"value": "1.40919153834e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.816", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.006": [{"id": "influence", "unit": [{"value": "1.67836187004e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.06976", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.005": [{"id": "influence", "unit": [{"value": "1.6429360297e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0871710356608", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.014": [{"id": "influence", "unit": [{"value": "1.47004185386e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.432073123169", "key": "score"}]}]} -{"10.1002/bjs.1800800850": [{"id": "influence", "unit": [{"value": "1.45655493484e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.47799794434e-05", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.007": [{"id": "influence", "unit": [{"value": "1.45629653212e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1057536", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.006": [{"id": "influence", "unit": [{"value": "2.32289086552e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.45411454376", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.015": [{"id": "influence", "unit": [{"value": "1.44553926616e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.176", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.013": [{"id": "influence", "unit": [{"value": "2.60304743482e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.23790336", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.008": [{"id": "influence", "unit": [{"value": "1.55644587328e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.614071296", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.018": [{"id": "influence", "unit": [{"value": "1.7716798502e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.98403656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.009": [{"id": "influence", "unit": [{"value": "1.56852779265e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.064945152", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.019": [{"id": "influence", "unit": [{"value": "1.4051013807e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.36", "key": "score"}]}]} -{"10.1016/j.buildenv.2009.12.014": [{"id": "influence", "unit": [{"value": "4.16520176181e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.18018171358", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.007": [{"id": "influence", "unit": [{"value": "1.42012974149e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.000470184984576", "key": "score"}]}]} -{"10.1002/bjs.1800830238": [{"id": "influence", "unit": [{"value": "2.35702875882e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.961022410653", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.020": [{"id": "influence", "unit": [{"value": "1.81191933705e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.76", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.010": [{"id": "influence", "unit": [{"value": "2.2300724133e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.901495296", "key": "score"}]}]} -{"10.1016/j.bulm.2004.11.008": [{"id": "influence", "unit": [{"value": "3.26455792748e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.40855210787", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.001": [{"id": "influence", "unit": [{"value": "1.95265402863e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.289997312", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.023": [{"id": "influence", "unit": [{"value": "1.44610848556e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.011": [{"id": "influence", "unit": [{"value": "1.56438579362e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.633424896", "key": "score"}]}]} -{"10.1016/j.bspc.2016.08.024": [{"id": "influence", "unit": [{"value": "1.52455960767e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.416", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.001": [{"id": "influence", "unit": [{"value": "1.40652400555e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.01679616", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.002": [{"id": "influence", "unit": [{"value": "1.81296293653e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.0816256", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.012": [{"id": "influence", "unit": [{"value": "1.86407798247e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.6482226176", "key": "score"}]}]} -{"10.1002/bjs.1800770831": [{"id": "influence", "unit": [{"value": "2.33499424669e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.224565660204", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.002": [{"id": "influence", "unit": [{"value": "1.48710792066e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.112", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.002": [{"id": "influence", "unit": [{"value": "1.84385083234e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.3555308798", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.003": [{"id": "influence", "unit": [{"value": "1.82138256221e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.79170816", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.014": [{"id": "influence", "unit": [{"value": "3.47023350086e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.33522248158", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.003": [{"id": "influence", "unit": [{"value": "1.7243704389e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.73603656158", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.004": [{"id": "influence", "unit": [{"value": "1.96614244154e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.64679752158", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.003": [{"id": "influence", "unit": [{"value": "1.84849867761e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.866660424745", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.005": [{"id": "influence", "unit": [{"value": "2.23601105505e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.32003656158", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.015": [{"id": "influence", "unit": [{"value": "2.05692337575e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12908712414", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.007": [{"id": "influence", "unit": [{"value": "2.41073729092e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "10.1440365616", "key": "score"}]}]} -{"10.1002/bjs.1800830239": [{"id": "influence", "unit": [{"value": "3.46552953627e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.1368777166", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.005": [{"id": "influence", "unit": [{"value": "4.3579701733e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.64887277434", "key": "score"}]}]} -{"10.1016/j.cad.2008.10.016": [{"id": "influence", "unit": [{"value": "1.82885876549e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.767503872", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.004": [{"id": "influence", "unit": [{"value": "1.40616171935e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.008223399936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.008": [{"id": "influence", "unit": [{"value": "2.00650284933e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.25607312317", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.001": [{"id": "influence", "unit": [{"value": "2.63357101584e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.79492096", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.006": [{"id": "influence", "unit": [{"value": "1.47565045883e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.227515392", "key": "score"}]}]} -{"10.1016/j.bulm.2004.12.005": [{"id": "influence", "unit": [{"value": "2.33561886021e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.30555116965", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.009": [{"id": "influence", "unit": [{"value": "1.75755379814e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.688", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.003": [{"id": "influence", "unit": [{"value": "3.1616178764e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.11116001758", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.007": [{"id": "influence", "unit": [{"value": "2.38387637008e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.18710016", "key": "score"}]}]} -{"10.1002/bjs.1800800851": [{"id": "influence", "unit": [{"value": "1.38460887125e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0", "key": "score"}]}]} -{"10.1016/j.bulm.2005.01.001": [{"id": "influence", "unit": [{"value": "1.42647191702e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.03162157056", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.010": [{"id": "influence", "unit": [{"value": "1.63573718026e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.88", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.006": [{"id": "influence", "unit": [{"value": "1.87221293034e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576571392", "key": "score"}]}]} -{"10.1016/j.bulm.2005.01.002": [{"id": "influence", "unit": [{"value": "4.55573629147e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "14.7926573464", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.011": [{"id": "influence", "unit": [{"value": "1.64394416726e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.136", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.007": [{"id": "influence", "unit": [{"value": "1.50436953162e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.134493696", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.008": [{"id": "influence", "unit": [{"value": "1.98956540239e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.282046161584", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.014": [{"id": "influence", "unit": [{"value": "1.53911323715e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.576036561584", "key": "score"}]}]} -{"10.1016/j.bulm.2005.01.003": [{"id": "influence", "unit": [{"value": "1.89561182084e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.52232193858", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.008": [{"id": "influence", "unit": [{"value": "1.41417356353e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0279936", "key": "score"}]}]} -{"10.1002/bjs.1800770832": [{"id": "influence", "unit": [{"value": "1.81095076918e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.0101951064951", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.009": [{"id": "influence", "unit": [{"value": "2.56761001538e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.050598912", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.015": [{"id": "influence", "unit": [{"value": "1.63966184918e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.136", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.009": [{"id": "influence", "unit": [{"value": "5.09401674186e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.51791044608", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.016": [{"id": "influence", "unit": [{"value": "1.46967592725e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.536", "key": "score"}]}]} -{"10.1016/j.buildenv.2010.01.010": [{"id": "influence", "unit": [{"value": "2.84315984215e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.649660052144", "key": "score"}]}]} -{"10.1016/j.cad.2008.11.010": [{"id": "influence", "unit": [{"value": "1.45603710668e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.1575936", "key": "score"}]}]} -{"10.1016/j.bspc.2016.09.017": [{"id": "influence", "unit": [{"value": "1.51152226412e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.65615844006e-05", "key": "score"}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json new file mode 100644 index 000000000..5dcf486d0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json @@ -0,0 +1,4 @@ +{"50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1": [{"id": "influence", "unit": [{"value": "6.63451994567e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.348694533145", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.16094680115e-09", "key": "score"}]}]} +{"50|dedup_wf_001::05b1f8ce98702f69d07aa5f0429de1e3": [{"id": "influence", "unit": [{"value": "6.25057357279e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "7.0208", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.40234462244e-08", "key": "score"}]}]} +{"50|dedup_wf_001::08823c8f5c3ca2eae523817036cdda67": [{"id": "influence", "unit": [{"value": "5.54921449123e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.53012887452e-10", "key": "score"}]}]} +{"50|dedup_wf_001::0e72b399325d6efcbe3271891a1dfe4c": [{"id": "influence", "unit": [{"value": "1.63466096315e-08", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "20.9870879741", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.49501495323e-08", "key": "score"}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json deleted file mode 100644 index e5e70cb27..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json +++ /dev/null @@ -1,18 +0,0 @@ -{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} -{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} -{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} -{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} -{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} -{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} -{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} -{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} -{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} -{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} -{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} -{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} -{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} -{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} -{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} -{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} -{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} -{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json deleted file mode 100644 index f849811ab..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json +++ /dev/null @@ -1,18 +0,0 @@ -{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} -{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} -{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} -{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} -{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} -{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value": "10.1016/j.buildenv.2010.01.008"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} -{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} -{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} -{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} -{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} -{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} -{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} -{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} -{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} -{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} -{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} -{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} -{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json deleted file mode 100644 index 521f4959c..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json +++ /dev/null @@ -1,18 +0,0 @@ -{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} -{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} -{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} -{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} -{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} -{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.006"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} -{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} -{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} -{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} -{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} -{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} -{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} -{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} -{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} -{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} -{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} -{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.buildenv.2010.01.008"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} -{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json index 1b46a3d25..a8221324f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json @@ -1,38 +1,39 @@ -{"doi":"10.3390/s18072310","level1":"engineering and technology","level2":"nano-technology","level3":"nanoscience & nanotechnology"} -{"doi":"10.1111/1365-2656.12831\u000210.17863/cam.24369","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} -{"doi":"10.3929/ethz-b-000187584\u000210.1002/chem.201701644","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.1080/01913123.2017.1367361","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"10.1051/e3sconf/20199207011","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"environmental sciences"} -{"doi":"10.1038/onc.2015.333","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"10.1093/mnras/staa256","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"10.1016/j.jclepro.2018.07.166","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"10.1103/physrevlett.125.037403","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1080/03602532.2017.1316285","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.1001/jamanetworkopen.2019.1868","level1":"medical and health sciences","level2":"other medical science","level3":"health policy & services"} -{"doi":"10.1128/mra.00874-18","level1":"natural sciences","level2":"biological sciences","level3":"plant biology & botany"} -{"doi":"10.1016/j.nancom.2018.03.001","level1":"engineering and technology","level2":"NULL","level3":"NULL"} -{"doi":"10.1112/topo.12174","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.12688/wellcomeopenres.15846.1","level1":"medical and health sciences","level2":"health sciences","level3":"NULL"} -{"doi":"10.21468/scipostphys.3.1.001","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"10.1088/1741-4326/ab6c77","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1109/tpwrs.2019.2944747","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"10.1016/j.expthermflusci.2019.109994\u000210.17863/cam.46212","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} -{"doi":"10.1109/tc.2018.2860012","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"computer hardware & architecture"} -{"doi":"10.1002/mma.6622","level1":"natural sciences","level2":"mathematics","level3":"numerical & computational mathematics"} -{"doi":"10.1051/radiopro/2020020","level1":"natural sciences","level2":"chemical sciences","level3":"NULL"} -{"doi":"10.1007/s12268-019-1003-4","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"10.3390/cancers12010236","level1":"medical and health sciences","level2":"health sciences","level3":"biochemistry & molecular biology"} -{"doi":"10.6084/m9.figshare.9912614\u000210.6084/m9.figshare.9912614.v1\u000210.1080/00268976.2019.1665199","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} -{"doi":"10.1175/jpo-d-17-0239.1","level1":"natural sciences","level2":"biological sciences","level3":"marine biology & hydrobiology"} -{"doi":"10.1007/s13218-020-00674-7","level1":"engineering and technology","level2":"industrial biotechnology","level3":"industrial engineering & automation"} -{"doi":"10.1016/j.psyneuen.2016.02.003\u000210.1016/j.psyneuen.2016.02.00310.7892/boris.78886\u000210.7892/boris.78886","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"10.1109/ted.2018.2813542","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"10.3989/scimar.04739.25a","level1":"natural sciences","level2":"biological sciences","level3":"NULL"} -{"doi":"10.3390/su12187503","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"NULL"} -{"doi":"10.1016/j.ccell.2018.08.017","level1":"medical and health sciences","level2":"basic medicine","level3":"biochemistry & molecular biology"} -{"doi":"10.1103/physrevresearch.2.023322","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"10.1039/c8cp03234c","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"10.5281/zenodo.3696557\u000210.5281/zenodo.3696556\u000210.1109/jsac.2016.2545384","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} -{"doi":"10.1038/ng.3667\u000210.1038/ng.3667.\u000210.17615/tct6-4m26\u000210.17863/cam.15649","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} -{"doi":"10.1016/j.jclepro.2019.119065","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"10.1111/pce.13392","level1":"agricultural and veterinary sciences","level2":"agriculture, forestry, and fisheries","level3":"agronomy & agriculture"} \ No newline at end of file +{"doi":"10.1080/1536383x.2020.1868997","level1":"02 engineering and technology","level2":"0210 nano-technology","level3":"021001 nanoscience & nanotechnology"} +{"doi":"10.1080/1536383x.2020.1868997","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1186/s40425-019-0732-8","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1186/s40425-019-0732-8","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s10482-021-01529-3","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s10482-021-01529-3","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030306 microbiology"} +{"doi":"10.1155/2021/6643273","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010301 acoustics"} +{"doi":"10.1155/2021/6643273","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation"} +{"doi":"10.12737/article_5d6613dbf2ad51.82646096","level1":"02 engineering and technology","level2":"0210 nano-technology","level3":"021001 nanoscience & nanotechnology"} +{"doi":"10.12737/article_5d6613dbf2ad51.82646096","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010302 applied physics"} +{"doi":"10.1216/jie.2020.32.457","level1":"01 natural sciences","level2":"0101 mathematics","level3":"010101 applied mathematics"} +{"doi":"10.1216/jie.2020.32.457","level1":"01 natural sciences","level2":"0101 mathematics","level3":"010102 general mathematics"} +{"doi":"10.3934/naco.2021021","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021103 operations research"} +{"doi":"10.3934/naco.2021021","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation"} +{"doi":"10.1080/1034912x.2021.1910933","level1":"05 social sciences","level2":"050301 education","level3":"050301 education"} +{"doi":"10.1080/1034912x.2021.1910933","level1":"05 social sciences","level2":"0501 psychology and cognitive sciences","level3":"050104 developmental & child psychology"} +{"doi":"10.1016/j.rtbm.2020.100596","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing"} +{"doi":"10.1016/j.rtbm.2020.100596","level1":"05 social sciences","level2":"0502 economics and business","level3":"050212 sport, leisure & tourism"} +{"doi":"10.14807/ijmp.v11i8.1220","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing"} +{"doi":"10.14807/ijmp.v11i8.1220","level1":"05 social sciences","level2":"0502 economics and business","level3":"050203 business & management"} +{"doi":"10.1007/s13205-020-02415-x","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030304 developmental biology"} +{"doi":"10.1007/s13205-020-02415-x","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030302 biochemistry & molecular biology"} +{"doi":"10.3390/s18072310","level1":"04 agricultural and veterinary sciences","level2":"0404 agricultural biotechnology","level3":"040502 food science"} +{"doi":"10.3390/s18072310","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030309 nutrition & dietetics"} +{"doi":"10.1063/5.0032658","level1":"01 natural sciences","level2":"0103 physical sciences","level3":"010304 chemical physics"} +{"doi":"10.1063/5.0032658","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1145/3411174.3411195","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020201 artificial intelligence & image processing"} +{"doi":"10.1145/3411174.3411195","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020206 networking & telecommunications"} +{"doi":"10.1021/acs.joc.0c02755","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry"} +{"doi":"10.1021/acs.joc.0c02755","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1002/jcp.28608","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1002/jcp.28608","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1097/cmr.0000000000000579","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis"} +{"doi":"10.1097/cmr.0000000000000579","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030304 developmental biology"} +{"doi":"10.1007/s11164-020-04383-6","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry"} +{"doi":"10.1007/s11164-020-04383-6","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry"} +{"doi":"10.1016/j.actpsy.2020.103155","level1":"05 social sciences","level2":"0501 psychology and cognitive sciences","level3":"050105 experimental psychology"} +{"doi":"10.1016/j.actpsy.2020.103155","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030217 neurology & neurosurgery"} +{"doi":"10.1109/memea49120.2020.9137187","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020208 electrical & electronic engineering"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv new file mode 100644 index 000000000..c5a2a821a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.csv @@ -0,0 +1,39 @@ +10.1080/1536383x.2020.1868997,02 engineering and technology,0210 nano-technology,021001 nanoscience & nanotechnology +10.1080/1536383x.2020.1868997,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1186/s40425-019-0732-8,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1186/s40425-019-0732-8,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s10482-021-01529-3,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s10482-021-01529-3,03 medical and health sciences,0301 basic medicine,030306 microbiology +10.1155/2021/6643273,01 natural sciences,0103 physical sciences,010301 acoustics +10.1155/2021/6643273,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation +10.12737/article_5d6613dbf2ad51.82646096,02 engineering and technology,0210 nano-technology,021001 nanoscience & nanotechnology +10.12737/article_5d6613dbf2ad51.82646096,01 natural sciences,0103 physical sciences,010302 applied physics +10.1216/jie.2020.32.457,01 natural sciences,0101 mathematics,010101 applied mathematics +10.1216/jie.2020.32.457,01 natural sciences,0101 mathematics,010102 general mathematics +10.3934/naco.2021021,02 engineering and technology,0211 other engineering and technologies,021103 operations research +10.3934/naco.2021021,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation +10.1080/1034912x.2021.1910933,05 social sciences,050301 education,050301 education +10.1080/1034912x.2021.1910933,05 social sciences,0501 psychology and cognitive sciences,050104 developmental & child psychology +10.1016/j.rtbm.2020.100596,05 social sciences,0502 economics and business,050211 marketing +10.1016/j.rtbm.2020.100596,05 social sciences,0502 economics and business,"050212 sport, leisure & tourism" +10.14807/ijmp.v11i8.1220,05 social sciences,0502 economics and business,050211 marketing +10.14807/ijmp.v11i8.1220,05 social sciences,0502 economics and business,050203 business & management +10.1007/s13205-020-02415-x,03 medical and health sciences,0303 health sciences,030304 developmental biology +10.1007/s13205-020-02415-x,03 medical and health sciences,0303 health sciences,030302 biochemistry & molecular biology +10.3390/foods10040865,04 agricultural and veterinary sciences,0404 agricultural biotechnology,040502 food science +10.3390/foods10040865,03 medical and health sciences,0303 health sciences,030309 nutrition & dietetics +10.1063/5.0032658,01 natural sciences,0103 physical sciences,010304 chemical physics +10.1063/5.0032658,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1145/3411174.3411195,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020201 artificial intelligence & image processing +10.1145/3411174.3411195,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020206 networking & telecommunications +10.1021/acs.joc.0c02755,01 natural sciences,0104 chemical sciences,010405 organic chemistry +10.1021/acs.joc.0c02755,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1002/jcp.28608,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1002/jcp.28608,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1097/cmr.0000000000000579,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis +10.1097/cmr.0000000000000579,03 medical and health sciences,0301 basic medicine,030304 developmental biology +10.1007/s11164-020-04383-6,01 natural sciences,0104 chemical sciences,010405 organic chemistry +10.1007/s11164-020-04383-6,01 natural sciences,0104 chemical sciences,010402 general chemistry +10.1016/j.actpsy.2020.103155,05 social sciences,0501 psychology and cognitive sciences,050105 experimental psychology +10.1016/j.actpsy.2020.103155,03 medical and health sciences,0302 clinical medicine,030217 neurology & neurosurgery +10.1109/memea49120.2020.9137187,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020208 electrical & electronic engineering \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv deleted file mode 100644 index e874353e8..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv +++ /dev/null @@ -1,38 +0,0 @@ -dedup_wf_001::ddcc7a56fa13e49bcc59c6bdd19ad26c 10.3390/s18072310 engineering and technology nano-technology nanoscience & nanotechnology -dedup_wf_001::b76062d56e28224eac56111a4e1e5ecf 10.1111/1365-2656.1283110.17863/cam.24369 social sciences psychology and cognitive sciences NULL -dedup_wf_001::bb752acb8f403a25fa7851a302f7b7ac 10.3929/ethz-b-00018758410.1002/chem.201701644 natural sciences NULL NULL -dedup_wf_001::2f1435a9201ecf5cbbcb12c9b2d971cd 10.1080/01913123.2017.1367361 medical and health sciences clinical medicine oncology & carcinogenesis -dedup_wf_001::fc9e47ec16c67b101724320d4b030514 10.1051/e3sconf/20199207011 natural sciences earth and related environmental sciences environmental sciences -dedup_wf_001::caa1e5b4de387cb31751552f4f0f5d72 10.1038/onc.2015.333 medical and health sciences clinical medicine oncology & carcinogenesis -dedup_wf_001::c2a98df5637d69bf0524eaf40fe6bf11 10.1093/mnras/staa256 natural sciences physical sciences NULL -dedup_wf_001::c221262bdc77cbfd59859a402f0e3991 10.1016/j.jclepro.2018.07.166 engineering and technology other engineering and technologies building & construction -doiboost____::d56d9dc21f317b3e009d5b6c8ea87212 10.1103/physrevlett.125.037403 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::8a7269c8ee6470b2fb4fd384bc389e08 10.1080/03602532.2017.1316285 natural sciences NULL NULL -dedup_wf_001::28342ebbc19833e4e1f4a2b23cf5ee20 10.1001/jamanetworkopen.2019.1868 medical and health sciences other medical science health policy & services -dedup_wf_001::c1e1daf2b55dd9ec8e1c7c7458bbc7bc 10.1128/mra.00874-18 natural sciences biological sciences plant biology & botany -dedup_wf_001::a2ef4a2720c71907180750e5871298ef 10.1016/j.nancom.2018.03.001 engineering and technology NULL NULL -dedup_wf_001::676f46a31519e83a89efcb1c626286fb 10.1112/topo.12174 natural sciences NULL NULL -dedup_wf_001::6f2761642f1e39313388e2c4060657dd 10.12688/wellcomeopenres.15846.1 medical and health sciences health sciences NULL -dedup_wf_001::e414c1dec599521a9635a60de0f6755b 10.21468/scipostphys.3.1.001 natural sciences physical sciences NULL -dedup_wf_001::f3395fe0f330164ea424dc61c86c9a3d 10.1088/1741-4326/ab6c77 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::a4f32a97a783117012f1de11797e73f2 10.1109/tpwrs.2019.2944747 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering -dedup_wf_001::313ae1cd083ae1696d12dd1909f97df8 10.1016/j.expthermflusci.2019.10999410.17863/cam.46212 engineering and technology mechanical engineering mechanical engineering & transports -dedup_wf_001::2a300a7d3ca7347791ebcef986bc0682 10.1109/tc.2018.2860012 engineering and technology electrical engineering, electronic engineering, information engineering computer hardware & architecture -doiboost____::5b79bd7bd9f87361b4a4abc3cbb2df75 10.1002/mma.6622 natural sciences mathematics numerical & computational mathematics -dedup_wf_001::6a3f61f217a2519fbaddea1094e3bfc2 10.1051/radiopro/2020020 natural sciences chemical sciences NULL -dedup_wf_001::a3f0430309a639f4234a0e57b10f2dee 10.1007/s12268-019-1003-4 medical and health sciences basic medicine NULL -dedup_wf_001::b6b8a3a1cccbee459cf3343485efdb12 10.3390/cancers12010236 medical and health sciences health sciences biochemistry & molecular biology -dedup_wf_001::dd06ee7974730e7b09a4f03c83b3f9bd 10.6084/m9.figshare.991261410.6084/m9.figshare.9912614.v110.1080/00268976.2019.1665199 natural sciences chemical sciences physical chemistry -dedup_wf_001::027c78bef6f972b5e26dfea55d30fbe3 10.1175/jpo-d-17-0239.1 natural sciences biological sciences marine biology & hydrobiology -dedup_wf_001::43edc179aa9e1fbaf582c5203b18b519 10.1007/s13218-020-00674-7 engineering and technology industrial biotechnology industrial engineering & automation -dedup_wf_001::e7770e11cd6eb514bb52c07b5a8a80f0 10.1016/j.psyneuen.2016.02.00310.1016/j.psyneuen.2016.02.00310.7892/boris.7888610.7892/boris.78886 medical and health sciences basic medicine NULL -dedup_wf_001::80bc15d69bdc589149631f3439dde5aa 10.1109/ted.2018.2813542 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering -dedup_wf_001::42c1cfa33e7872944b920cff90f4d99e 10.3989/scimar.04739.25a natural sciences biological sciences NULL -dedup_wf_001::9bacdbbaa9da3658b7243d5de8e3ce14 10.3390/su12187503 natural sciences earth and related environmental sciences NULL -dedup_wf_001::59e43d3527dcfecb6097fbd5740c8950 10.1016/j.ccell.2018.08.017 medical and health sciences basic medicine biochemistry & molecular biology -doiboost____::e024d1b738df3b24bc58fa0228542571 10.1103/physrevresearch.2.023322 natural sciences physical sciences nuclear & particles physics -dedup_wf_001::66e9a3237fa8178886d26d3c2d5b9e66 10.1039/c8cp03234c natural sciences NULL NULL -dedup_wf_001::83737ab4205bae751571bb3b166efa18 10.5281/zenodo.369655710.5281/zenodo.369655610.1109/jsac.2016.2545384 engineering and technology electrical engineering, electronic engineering, information engineering networking & telecommunications -dedup_wf_001::e3f892db413a689e572dd256acad55fe 10.1038/ng.366710.1038/ng.3667.10.17615/tct6-4m2610.17863/cam.15649 medical and health sciences health sciences genetics & heredity -dedup_wf_001::14ba594e8fd081847bc3f50f56335003 10.1016/j.jclepro.2019.119065 engineering and technology other engineering and technologies building & construction -dedup_wf_001::08ac7b33a41bcea2d055ecd8585d632e 10.1111/pce.13392 agricultural and veterinary sciences agriculture, forestry, and fisheries agronomy & agriculture \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json new file mode 100644 index 000000000..59d707177 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg.json @@ -0,0 +1,37 @@ +{"doi":"10.1001/amaguidesnewsletters.2019.mayjun02","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2019.novdec01","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2019.sepoct02","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2019.sepoct02","sbj":"8. Economic growth"} +{"doi":"10.1001/amaguidesnewsletters.2020.janfeb01","sbj":"8. Economic growth"} +{"doi":"10.1001/amaguidesnewsletters.2020.janfeb02","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.janfeb02","sbj":"8. Economic growth"} +{"doi":"10.1001/amaguidesnewsletters.2020.julaug01","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.marapr01","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.mayjun01","sbj":"3. Good health"} +{"doi":"10.1001/amaguidesnewsletters.2020.mayjun02","sbj":"16. Peace & justice"} +{"doi":"10.1001/amaguidesnewsletters.2020.mayjun02","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2021.julaug01","sbj":"1. No poverty"} +{"doi":"10.1001/amaguidesnewsletters.2021.mayjune01","sbj":"10. No inequality"} +{"doi":"10.1001/amaguidesnewsletters.2021.mayjune02","sbj":"10. No inequality"} +{"doi":"10.4336/2021.pfb.41e201902078","sbj":"15. Life on land"} +{"doi":"10.4337/ejeep.2019.00045","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2019.00050","sbj":"1. No poverty"} +{"doi":"10.4337/ejeep.2019.0045","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2019.0050","sbj":"1. No poverty"} +{"doi":"10.4337/ejeep.2019.0051","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2019.0052","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.0058","sbj":"1. No poverty"} +{"doi":"10.4337/ejeep.2020.0058","sbj":"10. No inequality"} +{"doi":"10.4337/ejeep.2020.0060","sbj":"10. No inequality"} +{"doi":"10.4337/ejeep.2020.0065","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.02.03","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.02.05","sbj":"8. Economic growth"} +{"doi":"10.4337/ejeep.2020.02.06","sbj":"16. Peace & justice"} +{"doi":"10.4337/ejeep.2020.02.09","sbj":"16. Peace & justice"} +{"doi":"10.4337/roke.2020.01.01","sbj":"16. Peace & justice"} +{"doi":"10.4337/roke.2020.01.03","sbj":"16. Peace & justice"} +{"doi":"10.4337/roke.2020.01.05","sbj":"1. No poverty"} +{"doi":"10.4337/roke.2020.01.05","sbj":"8. Economic growth"} +{"doi":"10.4337/roke.2020.01.07","sbj":"8. Economic growth"} +{"doi":"10.4337/roke.2020.02.03","sbj":"8. Economic growth"} +{"doi":"10.3390/s18072310","sbj":"1. No poverty"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv new file mode 100644 index 000000000..30524467e --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/sdg/sdg_sbs.csv @@ -0,0 +1,37 @@ +10.1001/amaguidesnewsletters.2019.mayjun02,10. No inequality +10.1001/amaguidesnewsletters.2019.novdec01,10. No inequality +10.1001/amaguidesnewsletters.2019.sepoct02,3. Good health +10.1001/amaguidesnewsletters.2019.sepoct02,8. Economic growth +10.1001/amaguidesnewsletters.2020.janfeb01,8. Economic growth +10.1001/amaguidesnewsletters.2020.janfeb02,3. Good health +10.1001/amaguidesnewsletters.2020.janfeb02,8. Economic growth +10.1001/amaguidesnewsletters.2020.julaug01,3. Good health +10.1001/amaguidesnewsletters.2020.marapr01,3. Good health +10.1001/amaguidesnewsletters.2020.mayjun01,3. Good health +10.1001/amaguidesnewsletters.2020.mayjun02,16. Peace & justice +10.1001/amaguidesnewsletters.2020.mayjun02,10. No inequality +10.1001/amaguidesnewsletters.2021.julaug01,1. No poverty +10.1001/amaguidesnewsletters.2021.mayjune01,10. No inequality +10.1001/amaguidesnewsletters.2021.mayjune02,10. No inequality +10.4336/2021.pfb.41e201902078,15. Life on land +10.4337/ejeep.2019.00045,16. Peace & justice +10.4337/ejeep.2019.00050,1. No poverty +10.4337/ejeep.2019.0045,16. Peace & justice +10.4337/ejeep.2019.0050,1. No poverty +10.4337/ejeep.2019.0051,16. Peace & justice +10.4337/ejeep.2019.0052,16. Peace & justice +10.4337/ejeep.2020.0058,1. No poverty +10.4337/ejeep.2020.0058,10. No inequality +10.4337/ejeep.2020.0060,10. No inequality +10.4337/ejeep.2020.0065,16. Peace & justice +10.4337/ejeep.2020.02.03,16. Peace & justice +10.4337/ejeep.2020.02.05,8. Economic growth +10.4337/ejeep.2020.02.06,16. Peace & justice +10.4337/ejeep.2020.02.09,16. Peace & justice +10.4337/roke.2020.01.01,16. Peace & justice +10.4337/roke.2020.01.03,16. Peace & justice +10.4337/roke.2020.01.05,1. No poverty +10.4337/roke.2020.01.05,8. Economic growth +10.4337/roke.2020.01.07,8. Economic growth +10.4337/roke.2020.02.03,8. Economic growth +10.4337/roke.2020.02.04,1. No poverty \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet new file mode 100644 index 000000000..ee59c325e Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/dataset/part-00000.parquet differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz new file mode 100644 index 000000000..c55dcd71c Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz new file mode 100644 index 000000000..ae7886e8c Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz new file mode 100644 index 000000000..837401919 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz new file mode 100644 index 000000000..0436b10ff Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz new file mode 100644 index 000000000..001322f84 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz new file mode 100644 index 000000000..12968af39 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 deleted file mode 100644 index d93d6fd99..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 +++ /dev/null @@ -1,8 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000007362801000805046300010563030608046333-0200101010136193701050501630209010637020000083700020400083733,10.1007/s10854-015-3684-x,10.1111/j.1551-2916.2008.02408.x,2015-09-01,P7Y2M,no,no -02001000007362801000805046300010563030608046333-02001000007362801000805046300010463020101046309,10.1007/s10854-015-3684-x,10.1007/s10854-014-2114-9,2015-09-01,P1Y2M4D,yes,no -02001000007362801000805046300010563030608046333-020010001063619371214271022182329370200010337000937000609,10.1007/s10854-015-3684-x,10.1016/j.ceramint.2013.09.069,2015-09-01,P1Y6M,no,no -02001000007362801000805046300010563030608046333-02001000007362801000805046300000963090901036304,10.1007/s10854-015-3684-x,10.1007/s10854-009-9913-4,2015-09-01,P6Y3M10D,yes,no -02001000007362801000805046300010563030608046333-02001000106360000030863010009085807025909000307006305,10.1007/s10854-015-3684-x,10.1016/0038-1098(72)90370-5,2015-09-01,P43Y8M,no,no -02001000007362801000805046300010563030608056309-02001000106361937281010370200010437000937000308,10.1007/s10854-015-3685-9,10.1016/j.saa.2014.09.038,2015-09-03,P0Y7M,no,no -02001000007362801000805046300010563030608056309-0200100010636193722102912171027370200010537000437000106,10.1007/s10854-015-3685-9,10.1016/j.matchar.2015.04.016,2015-09-03,P0Y2M,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz new file mode 100644 index 000000000..7a734f4e2 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 deleted file mode 100644 index 14ee8b354..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 +++ /dev/null @@ -1,8 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000308362804010509076300010963000003086301-0200100020936020001003227000009010004,10.1038/s41597-019-0038-1,10.1029/2010wr009104,2019-04-15,P8Y1M,no,no -02001000308362804010509076300010963000003086301-0200100010636280103060463080105025800015900000006006303,10.1038/s41597-019-0038-1,10.1016/s1364-8152(01)00060-3,2019-04-15,P17Y3M,no,no -02001000308362804010509076300010963000003086301-02001000007362800000407076300010063000401066333,10.1038/s41597-019-0038-1,10.1007/s00477-010-0416-x,2019-04-15,P8Y9M6D,no,no -02001000308362804010509076300010963000003086301-02001000007362800000700046300010363000905016308,10.1038/s41597-019-0038-1,10.1007/s00704-013-0951-8,2019-04-15,P5Y9M23D,no,no -02001000308362804010509076300010963000003086301-02001000002361924123705070707,10.1038/s41597-019-0038-1,10.1002/joc.5777,2019-04-15,P0Y8M1D,no,no -02001000308362804010509076300010963000003086301-02005010904361714282863020263040504076302000108,10.1038/s41597-019-0038-1,10.5194/hess-22-4547-2018,2019-04-15,P0Y7M18D,no,no -02001000308362804010509076300010963000003086301-02001000002361924123703050404,10.1038/s41597-019-0038-1,10.1002/joc.3544,2019-04-15,P6Y9M6D,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz new file mode 100644 index 000000000..b30aa073c Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 deleted file mode 100644 index 0611929d5..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 +++ /dev/null @@ -1,9 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -0200100000236090708010101090307000202023727141528-020050302063600040000010307,10.1002/9781119370222.refs,10.5326/0400137,2020-06-22,P16Y3M,no,no -0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020000073700000301093733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2007.00319.x,2020-06-22,P12Y8M,no,no -0200100000236090708010101090307000202023727141528-0200101010136312830370102030509,10.1002/9781119370222.refs,10.1111/vsu.12359,2020-06-22,P4Y10M29D,no,no -0200100000236090708010101090307000202023727141528-020050302063600030900020904,10.1002/9781119370222.refs,10.5326/0390294,2020-06-22,P17Y1M,no,no -0200100000236090708010101090307000202023727141528-020050302063600040200030701,10.1002/9781119370222.refs,10.5326/0420371,2020-06-22,P13Y9M,no,no -0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020001033701020000003733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2013.12000.x,2020-06-22,P7Y2M,no,no -0200100000236090708010101090307000202023727141528-020010008003600000408000106093702000006370306070200,10.1002/9781119370222.refs,10.1080/00480169.2006.36720,2020-06-22,P13Y6M,no,no -0200100000236090708010101090307000202023727141528-0200101010136193701070501630008010337020000063700000003033733,10.1002/9781119370222.refs,10.1111/j.1751-0813.2006.00033.x,2020-06-22,P13Y8M,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz new file mode 100644 index 000000000..43348836a Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz new file mode 100644 index 000000000..ffa2698cd Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz new file mode 100644 index 000000000..2e7a6c85b Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala new file mode 100644 index 000000000..ca1dbc665 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -0,0 +1,108 @@ +package eu.dnetlib.dhp.datacite + +import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} +import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest +import eu.dnetlib.dhp.schema.oaf.Oaf +import org.apache.commons.io.FileUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.functions.{col, count} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.extension.ExtendWith +import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} +import org.mockito.junit.jupiter.MockitoExtension +import org.slf4j.{Logger, LoggerFactory} + +import java.nio.file.{Files, Path} +import java.text.SimpleDateFormat +import java.util.Locale +import scala.io.Source + +@ExtendWith(Array(classOf[MockitoExtension])) +class DataciteToOAFTest extends AbstractVocabularyTest { + + private var workingDir: Path = null + val log: Logger = LoggerFactory.getLogger(getClass) + + @BeforeEach + def setUp(): Unit = { + + workingDir = Files.createTempDirectory(getClass.getSimpleName) + super.setUpVocabulary() + } + + @AfterEach + def tearDown(): Unit = { + FileUtils.deleteDirectory(workingDir.toFile) + } + + @Test + def testDateMapping: Unit = { + val inputDate = "2021-07-14T11:52:54+0000" + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + val dt = ISO8601FORMAT.parse(inputDate) + println(dt.getTime) + + } + + @Test + def testConvert(): Unit = { + + val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath + + val conf = new SparkConf() + val spark: SparkSession = SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master("local[*]") + .getOrCreate() + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + val instance = new GenerateDataciteDatasetSpark(null, null, log) + val targetPath = s"$workingDir/result" + + instance.generateDataciteDataset(path, exportLinks = true, vocabularies, targetPath, spark) + + import spark.implicits._ + + val nativeSize = spark.read.load(path).count() + + assertEquals(100, nativeSize) + + val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] + + result + .map(s => s.getClass.getSimpleName) + .groupBy(col("value").alias("class")) + .agg(count("value").alias("Total")) + .show(false) + + val t = spark.read.load(targetPath).count() + + assertTrue(t > 0) + + spark.stop() + + } + + @Test + def testMapping(): Unit = { + val record = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json") + ) + .mkString + + val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) + val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true) + + res.foreach(r => { + println(mapper.writeValueAsString(r)) + println("----------------------------") + + }) + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala new file mode 100644 index 000000000..ea742a04a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -0,0 +1,202 @@ +package eu.dnetlib.dhp.sx.bio + +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} +import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest +import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PubMedToOaf} +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.parse +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.extension.ExtendWith +import org.junit.jupiter.api.{BeforeEach, Test} +import org.mockito.junit.jupiter.MockitoExtension + +import java.io.{BufferedReader, InputStream, InputStreamReader} +import java.util.zip.GZIPInputStream +import scala.collection.JavaConverters._ +import scala.io.Source +import scala.xml.pull.XMLEventReader + +@ExtendWith(Array(classOf[MockitoExtension])) +class BioScholixTest extends AbstractVocabularyTest { + + val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + + @BeforeEach + def setUp(): Unit = { + + super.setUpVocabulary() + } + + class BufferedReaderIterator(reader: BufferedReader) extends Iterator[String] { + override def hasNext() = reader.ready + override def next() = reader.readLine() + } + + object GzFileIterator { + + def apply(is: InputStream, encoding: String) = { + new BufferedReaderIterator( + new BufferedReader(new InputStreamReader(new GZIPInputStream(is), encoding)) + ) + } + } + + @Test + def testEBIData() = { + val inputXML = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")) + .mkString + val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes())) + new PMParser(xml).foreach(s => println(mapper.writeValueAsString(s))) + } + + @Test + def testPubmedToOaf(): Unit = { + assertNotNull(vocabularies) + assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")) + .mkString + val r: List[Oaf] = records.lines.toList + .map(s => mapper.readValue(s, classOf[PMArticle])) + .map(a => PubMedToOaf.convert(a, vocabularies)) + assertEquals(10, r.size) + assertTrue( + r.map(p => p.asInstanceOf[Result]) + .flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)) + .exists(p => "0037".equalsIgnoreCase(p)) + ) + println(mapper.writeValueAsString(r.head)) + + } + + @Test + def testPDBToOAF(): Unit = { + + assertNotNull(vocabularies) + assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pdb_dump")) + .mkString + records.lines.foreach(s => assertTrue(s.nonEmpty)) + + val result: List[Oaf] = records.lines.toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) + + assertTrue(result.nonEmpty) + result.foreach(r => assertNotNull(r)) + + println(result.count(o => o.isInstanceOf[Relation])) + println(mapper.writeValueAsString(result.head)) + + } + + @Test + def testUNIprotToOAF(): Unit = { + + assertNotNull(vocabularies) + assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) + + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump")) + .mkString + records.lines.foreach(s => assertTrue(s.nonEmpty)) + + val result: List[Oaf] = records.lines.toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) + + assertTrue(result.nonEmpty) + result.foreach(r => assertNotNull(r)) + + println(result.count(o => o.isInstanceOf[Relation])) + println(mapper.writeValueAsString(result.head)) + + } + + case class EBILinks( + relType: String, + date: String, + title: String, + pmid: String, + targetPid: String, + targetPidType: String + ) {} + + def parse_ebi_links(input: String): List[EBILinks] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + val pmid = (json \ "publication" \ "pmid").extract[String] + for { + JObject(link) <- json \\ "Link" + JField("Target", JObject(target)) <- link + JField("RelationshipType", JObject(relType)) <- link + JField("Name", JString(relation)) <- relType + JField("PublicationDate", JString(publicationDate)) <- link + JField("Title", JString(title)) <- target + JField("Identifier", JObject(identifier)) <- target + JField("IDScheme", JString(idScheme)) <- identifier + JField("ID", JString(id)) <- identifier + + } yield EBILinks(relation, publicationDate, title, pmid, id, idScheme) + } + + @Test + def testCrossrefLinksToOAF(): Unit = { + + val records: String = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/crossref_links")) + .mkString + records.lines.foreach(s => assertTrue(s.nonEmpty)) + + val result: List[Oaf] = records.lines.map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList + + assertNotNull(result) + assertTrue(result.nonEmpty) + + println(mapper.writeValueAsString(result.head)) + + } + + @Test + def testEBILinksToOAF(): Unit = { + val iterator = GzFileIterator( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz"), + "UTF-8" + ) + val data = iterator.next() + + val res = BioDBToOAF + .parse_ebi_links(BioDBToOAF.extractEBILinksFromDump(data).links) + .filter(BioDBToOAF.EBITargetLinksFilter) + .flatMap(BioDBToOAF.convertEBILinksToOaf) + print(res.length) + + println(mapper.writeValueAsString(res.head)) + + } + + @Test + def scholixResolvedToOAF(): Unit = { + + val records: String = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved") + ) + .mkString + records.lines.foreach(s => assertTrue(s.nonEmpty)) + + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + val l: List[ScholixResolved] = records.lines.map { input => + lazy val json = parse(input) + json.extract[ScholixResolved] + }.toList + + val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) + + assertTrue(result.nonEmpty) + } + +} diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 7d0d6b0b8..380991526 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -90,7 +90,7 @@ public class ReadBlacklistFromDB implements Closeable { inverse.setSource(target_direct); String encoding = rs.getString("relationship"); - RelationInverse ri = ModelSupport.relationInverseMap.get(encoding); + RelationInverse ri = ModelSupport.findInverse(encoding); direct.setRelClass(ri.getRelClass()); inverse.setRelClass(ri.getInverseRelClass()); direct.setRelType(ri.getRelType()); diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java new file mode 100644 index 000000000..160658e5b --- /dev/null +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.blacklist; + +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.common.RelationInverse; + +public class BlacklistRelationTest { + + @Test + public void testRelationInverseLookup() { + + final List rels = Arrays + .asList( + "resultResult_relationship_IsRelatedTo", + "resultOrganization_affiliation_isAuthorInstitutionOf", + "resultOrganization_affiliation_hasAuthorInstitution", + "datasourceOrganization_provision_isProvidedBy", + "projectOrganization_participation_hasParticipant", + "resultProject_outcome_produces", + "resultProject_outcome_isProducedBy"); + + rels.forEach(r -> { + RelationInverse inverse = ModelSupport.relationInverseMap.get(r); + Assertions.assertNotNull(inverse); + Assertions.assertNotNull(inverse.getRelType()); + Assertions.assertNotNull(inverse.getSubReltype()); + Assertions.assertNotNull(inverse.getRelClass()); + }); + + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 45bfc785f..52e9917bb 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; @ExtendWith(MockitoExtension.class) -class UpdateMatcherTest { +public class UpdateMatcherTest { UpdateMatcher matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 550ded9f4..5af81a31a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.broker.objects.OaBrokerMainEntity; -class EnrichMissingPublicationDateTest { +public class EnrichMissingPublicationDateTest { final EnrichMissingPublicationDate matcher = new EnrichMissingPublicationDate(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index b532aa9f7..d93390e4a 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -8,7 +8,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; -class SubscriptionUtilsTest { +public class SubscriptionUtilsTest { @Test void testVerifyListSimilar() { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index a8bc03e31..117bdeef4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,7 +9,7 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -class TrustUtilsTest { +public class TrustUtilsTest { private static final double THRESHOLD = 0.95; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 6a9b21b00..136413376 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -139,14 +139,28 @@ abstract class AbstractSparkAction implements Serializable { protected boolean isOpenorgs(Relation rel) { return Optional .ofNullable(rel.getCollectedfrom()) - .map( - c -> c - .stream() - .filter(Objects::nonNull) - .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) + .map(c -> isCollectedFromOpenOrgs(c)) .orElse(false); } + protected boolean isOpenorgsDedupRel(Relation rel) { + return isOpenorgs(rel) && isOpenOrgsDedupMergeRelation(rel); + } + + private boolean isCollectedFromOpenOrgs(List c) { + return c + .stream() + .filter(Objects::nonNull) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())); + } + + private boolean isOpenOrgsDedupMergeRelation(Relation rel) { + return ModelConstants.ORG_ORG_RELTYPE.equals(rel.getRelType()) && + ModelConstants.DEDUP.equals(rel.getSubRelType()) + && (ModelConstants.IS_MERGED_IN.equals(rel.getRelClass()) || + ModelConstants.MERGES.equals(rel.getRelClass())); + } + protected static Boolean parseECField(Field field) { if (field == null) return null; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9cc003bf6..62cbb5bff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -61,7 +61,7 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(x -> !isOpenorgs(x)); + .filter(x -> !isOpenorgsDedupRel(x)); if (log.isDebugEnabled()) { log.debug("Number of non-Openorgs relations collected: {}", simRels.count()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index 4ea003926..7c500493f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -104,7 +104,7 @@ yarn cluster group graph entities - eu.dnetlib.dhp.oa.dedup.GroupEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -138,7 +138,7 @@ yarn cluster Dispatch publications - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -163,7 +163,7 @@ yarn cluster Dispatch project - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -188,7 +188,7 @@ yarn cluster Dispatch organization - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -213,7 +213,7 @@ yarn cluster Dispatch publication - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -238,7 +238,7 @@ yarn cluster Dispatch dataset - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -263,7 +263,7 @@ yarn cluster Dispatch software - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -288,7 +288,7 @@ yarn cluster Dispatch otherresearchproduct - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 2349ffebe..2a9f34dee 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -11,6 +11,8 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; +import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -29,6 +31,8 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -226,9 +230,10 @@ public class SparkOpenorgsProvisionTest implements Serializable { new SparkCopyRelationsNoOpenorgs(parser, spark).run(isLookUpService); - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + final JavaRDD rels = jsc.textFile(testDedupGraphBasePath + "/relation"); + + assertEquals(2382, rels.count()); - assertEquals(2380, relations); } @Test @@ -250,7 +255,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4894, relations); + assertEquals(4896, relations); // check deletedbyinference final Dataset mergeRels = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 index 67d491ca2..35d92089d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs/provision/relation/part-00000 @@ -2518,3 +2518,5 @@ {"subRelType": "dedup", "relClass": "isMergedIn", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::5c351d85f02db01ca291acd119f0bd78", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|opendoar____::37248e2f6987b18670dd2b8a51d6ef55", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} {"subRelType": "dedup", "relClass": "merges", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} {"subRelType": "dedup", "relClass": "isMergedIn", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} +{"subRelType": "relationship", "relClass": "IsParentOf", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} +{"subRelType": "relationship", "relClass": "IsChildOf", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.990"}, "target": "20|corda_______::6acb33e6ea8c6fcdabc891c80d083c64", "lastupdatetimestamp": 1617801137807, "relType": "organizationOrganization", "source": "20|openorgs____::e38c1a27fcb0f0ab218828e4f5fc7be9", "validationDate": null, "collectedfrom": [{"dataInfo": null, "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8", "value": "OpenOrgs Database"}], "validated": false, "properties": []} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala deleted file mode 100644 index 3bfca0859..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala +++ /dev/null @@ -1,87 +0,0 @@ -package eu.dnetlib.doiboost - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset} -import org.apache.commons.io.IOUtils -import org.apache.hadoop.io.Text -import org.apache.hadoop.io.compress.GzipCodec -import org.apache.hadoop.mapred.SequenceFileOutputFormat -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -object SparkGenerateDOIBoostActionSet { - val logger: Logger = LoggerFactory.getLogger(getClass) - def main(args: Array[String]): Unit = { - - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] - implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] - implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] - implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] - implicit val mapEncoderAS: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING) - - implicit val mapEncoderAtomiAction: Encoder[AtomicAction[OafDataset]] = Encoders.kryo[AtomicAction[OafDataset]] - - val dbPublicationPath = parser.get("dbPublicationPath") - val dbDatasetPath = parser.get("dbDatasetPath") - val crossRefRelation = parser.get("crossRefRelation") - val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath") - val dbOrganizationPath = parser.get("dbOrganizationPath") - val sequenceFilePath = parser.get("sFilePath") - - val asDataset = spark.read.load(dbDatasetPath).as[OafDataset] - .filter(p => p != null || p.getId != null) - .map(d =>DoiBoostMappingUtil.fixResult(d)) - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - val asPublication =spark.read.load(dbPublicationPath).as[Publication] - .filter(p => p != null || p.getId != null) - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - val asOrganization = spark.read.load(dbOrganizationPath).as[Organization] - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - - val asCRelation = spark.read.load(crossRefRelation).as[Relation] - .filter(r => r!= null && r.getSource != null && r.getTarget != null) - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - val asRelAffiliation = spark.read.load(dbaffiliationRelationPath).as[Relation] - .map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING)) - - - - - - val d: Dataset[(String, String)] = asDataset.union(asPublication).union(asOrganization).union(asCRelation).union(asRelAffiliation) - - - - d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) - - - - - - - - - - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala deleted file mode 100644 index 501073e74..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ /dev/null @@ -1,247 +0,0 @@ -package eu.dnetlib.doiboost - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset} -import eu.dnetlib.doiboost.mag.ConversionUtil -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ -import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString,JArray} -import org.json4s.jackson.JsonMethods.parse - -object SparkGenerateDoiBoost { - - - def extractIdGRID(input:String):List[(String,String)] = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: org.json4s.JValue = parse(input) - - val id:String = (json \ "id").extract[String] - - val grids:List[String] = for { - - JObject(pid) <- json \ "pid" - JField("qualifier", JObject(qualifier)) <- pid - JField("classid", JString(classid)) <-qualifier - JField("value", JString(vl)) <- pid - if classid == "GRID" - } yield vl - grids.map(g => (id, s"unresolved::grid::${g.toLowerCase}"))(collection.breakOut) - } - - - - def main(args: Array[String]): Unit = { - - val logger: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - import spark.implicits._ - - val hostedByMapPath = parser.get("hostedByMapPath") - val workingDirPath = parser.get("workingPath") - val openaireOrganizationPath = parser.get("openaireOrganizationPath") - - val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { - override def zero: Publication = new Publication - - override def reduce(b: Publication, a: (String, Publication)): Publication = { - - if (b == null) { - if (a != null && a._2 != null) { - a._2.setId(a._1) - return a._2 - } - } - else { - if (a != null && a._2 != null) { - b.mergeFrom(a._2) - b.setId(a._1) - val authors =AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor) - b.setAuthor(authors) - return b - } - } - new Publication - } - - override def merge(b1: Publication, b2: Publication): Publication = { - if (b1 == null) { - if (b2 != null) - return b2 - } - else { - if (b2 != null ) { - b1.mergeFrom(b2) - val authors =AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) - b1.setAuthor(authors) - if (b2.getId!= null && b2.getId.nonEmpty) - b1.setId(b2.getId) - return b1 - } - } - new Publication - } - - override def finish(reduction: Publication): Publication = reduction - - override def bufferEncoder: Encoder[Publication] = Encoders.kryo[Publication] - - override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication] - } - - - implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] - implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] - implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] - implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub) - implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] - - logger.info("Phase 2) Join Crossref with UnpayWall") - - val crossrefPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p)) - val uwPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/uwPublication").as[Publication].map(p => (p.getId, p)) - - def applyMerge(item:((String, Publication), (String, Publication))) : Publication = - { - val crossrefPub = item._1._2 - if (item._2!= null) { - val otherPub = item._2._2 - if (otherPub != null) { - crossrefPub.mergeFrom(otherPub) - crossrefPub.setAuthor(AuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor)) - } - } - crossrefPub - } - crossrefPublication.joinWith(uwPublication, crossrefPublication("_1").equalTo(uwPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/firstJoin") - logger.info("Phase 3) Join Result with ORCID") - val fj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p)) - val orcidPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p)) - fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/secondJoin") - - logger.info("Phase 4) Join Result with MAG") - val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p)) - - val magPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p)) - sj.joinWith(magPublication, sj("_1").equalTo(magPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublication") - - - val doiBoostPublication: Dataset[(String,Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublication").as[Publication].filter(p=>DoiBoostMappingUtil.filterPublication(p)).map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder) - - val hostedByDataset : Dataset[(String, HostedByItemType)] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem)) - - - doiBoostPublication.joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left") - .map(DoiBoostMappingUtil.fixPublication) - .map(p => (p.getId, p)) - .groupByKey(_._1) - .agg(crossrefAggregator.toColumn) - .map(p => p._2) - .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationFiltered") - - val affiliationPath = parser.get("affiliationPath") - val paperAffiliationPath = parser.get("paperAffiliationPath") - - val affiliation = spark.read.load(affiliationPath).select(col("AffiliationId"), col("GridId"), col("OfficialPage"), col("DisplayName")) - - val paperAffiliation = spark.read.load(paperAffiliationPath).select(col("AffiliationId").alias("affId"), col("PaperId")) - - - val a:Dataset[DoiBoostAffiliation] = paperAffiliation - .joinWith(affiliation, paperAffiliation("affId").equalTo(affiliation("AffiliationId"))) - .select(col("_1.PaperId"), col("_2.AffiliationId"), col("_2.GridId"), col("_2.OfficialPage"), col("_2.DisplayName")).as[DoiBoostAffiliation] - - - - val magPubs:Dataset[(String,Publication)]= spark.read.load(s"$workingDirPath/doiBoostPublicationFiltered").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))(tupleForJoinEncoder).filter(s =>s._1!= null ) - - - magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).flatMap(item => { - val pub:Publication = item._1._2 - val affiliation = item._2 - val affId:String = if (affiliation.GridId.isDefined) s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) - val r:Relation = new Relation - r.setSource(pub.getId) - r.setTarget(affId) - r.setRelType(ModelConstants.RESULT_ORGANIZATION) - r.setRelClass(ModelConstants.HAS_AUTHOR_INSTITUTION) - r.setSubRelType(ModelConstants.AFFILIATION) - r.setDataInfo(pub.getDataInfo) - r.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) - val r1:Relation = new Relation - r1.setTarget(pub.getId) - r1.setSource(affId) - r1.setRelType(ModelConstants.RESULT_ORGANIZATION) - r1.setRelClass(ModelConstants.IS_AUTHOR_INSTITUTION_OF) - r1.setSubRelType(ModelConstants.AFFILIATION) - r1.setDataInfo(pub.getDataInfo) - r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) - List(r, r1) - })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") - - - - - val unresolvedRels:Dataset[(String, Relation)] = spark.read.load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved").as[Relation].map(r => { - - if (r.getSource.startsWith("unresolved")) - (r.getSource, r) - else if (r.getTarget.startsWith("unresolved")) - (r.getTarget,r) - else - ("resolved", r) - })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) - - val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) - - unresolvedRels.joinWith(openaireOrganization,unresolvedRels("_1").equalTo(openaireOrganization("_2"))) - .map { x => - val currentRels = x._1._2 - val currentOrgs = x._2 - if (currentOrgs!= null) - if(currentRels.getSource.startsWith("unresolved")) - currentRels.setSource(currentOrgs._1) - else - currentRels.setTarget(currentOrgs._1) - currentRels - }.filter(r=> !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") - - magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { - val affiliation = item._2 - if (affiliation.GridId.isEmpty) { - val o = new Organization - o.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) - o.setDataInfo(DoiBoostMappingUtil.generateDataInfo()) - o.setId(DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString)) - o.setOriginalId(List(affiliation.AffiliationId.toString).asJava) - if (affiliation.DisplayName.nonEmpty) - o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get)) - if (affiliation.OfficialPage.isDefined) - o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get)) - o.setCountry(ModelConstants.UNKNOWN_COUNTRY) - o - } - else - null - }).filter(o=> o!=null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization") - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala deleted file mode 100644 index 1b1c850ba..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ /dev/null @@ -1,515 +0,0 @@ -package eu.dnetlib.doiboost.crossref - -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf._ -import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} -import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} -import org.apache.commons.lang.StringUtils -import org.json4s -import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JValue, _} -import org.json4s.jackson.JsonMethods._ -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ -import scala.collection.mutable -import scala.util.matching.Regex -import java.util - -import eu.dnetlib.doiboost.DoiBoostMappingUtil - -case class CrossrefDT(doi: String, json:String, timestamp: Long) {} - -case class mappingAffiliation(name: String) {} - -case class mappingAuthor(given: Option[String], family: String, sequence:Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation]) {} - -case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} - - -case object Crossref2Oaf { - val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) - - val mappingCrossrefType = Map( - "book-section" -> "publication", - "book" -> "publication", - "book-chapter" -> "publication", - "book-part" -> "publication", - "book-series" -> "publication", - "book-set" -> "publication", - "book-track" -> "publication", - "edited-book" -> "publication", - "reference-book" -> "publication", - "monograph" -> "publication", - "journal-article" -> "publication", - "dissertation" -> "publication", - "other" -> "publication", - "peer-review" -> "publication", - "proceedings" -> "publication", - "proceedings-article" -> "publication", - "reference-entry" -> "publication", - "report" -> "publication", - "report-series" -> "publication", - "standard" -> "publication", - "standard-series" -> "publication", - "posted-content" -> "publication", - "dataset" -> "dataset" - ) - - - val mappingCrossrefSubType = Map( - "book-section" -> "0013 Part of book or chapter of book", - "book" -> "0002 Book", - "book-chapter" -> "0013 Part of book or chapter of book", - "book-part" -> "0013 Part of book or chapter of book", - "book-series" -> "0002 Book", - "book-set" -> "0002 Book", - "book-track" -> "0002 Book", - "edited-book" -> "0002 Book", - "reference-book" -> "0002 Book", - "monograph" -> "0002 Book", - "journal-article" -> "0001 Article", - "dissertation" -> "0044 Thesis", - "other" -> "0038 Other literature type", - "peer-review" -> "0015 Review", - "proceedings" -> "0004 Conference object", - "proceedings-article" -> "0004 Conference object", - "reference-entry" -> "0013 Part of book or chapter of book", - "report" -> "0017 Report", - "report-series" -> "0017 Report", - "standard" -> "0038 Other literature type", - "standard-series" -> "0038 Other literature type", - "dataset" -> "0021 Dataset", - "preprint" -> "0016 Preprint", - "report" -> "0017 Report" - ) - - def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - - //MAPPING Crossref DOI into PID - val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) - result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) - - //MAPPING Crossref DOI into OriginalId - //and Other Original Identifier of dataset like clinical-trial-number - val clinicalTrialNumbers = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr - val alternativeIds = for (JString(ids) <- json \ "alternative-id") yield ids - val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi) - - val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava) - result.setOriginalId(originalIds) - - - // Add DataInfo - result.setDataInfo(generateDataInfo()) - - result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long]) - result.setDateofcollection((json \ "indexed" \ "date-time").extract[String]) - - result.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava) - - // Publisher ( Name of work's publisher mapped into Result/Publisher) - val publisher = (json \ "publisher").extractOrElse[String](null) - if (publisher!= null && publisher.nonEmpty) - result.setPublisher(asField(publisher)) - - - // TITLE - val mainTitles = for {JString(title) <- json \ "title" if title.nonEmpty} yield createSP(title, "main title", ModelConstants.DNET_DATACITE_TITLE) - val originalTitles = for {JString(title) <- json \ "original-title" if title.nonEmpty} yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) - val shortTitles = for {JString(title) <- json \ "short-title" if title.nonEmpty} yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) - val subtitles = for {JString(title) <- json \ "subtitle" if title.nonEmpty} yield createSP(title, "subtitle", ModelConstants.DNET_DATACITE_TITLE) - result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) - - // DESCRIPTION - val descriptionList = for {JString(description) <- json \ "abstract"} yield asField(description) - result.setDescription(descriptionList.asJava) - - // Source - val sourceList = for {JString(source) <- json \ "source" if source!= null && source.nonEmpty} yield asField(source) - result.setSource(sourceList.asJava) - - //RELEVANT DATE Mapping - val createdDate = generateDate((json \ "created" \ "date-time").extract[String], (json \ "created" \ "date-parts").extract[List[List[Int]]], "created", ModelConstants.DNET_DATACITE_DATE) - val postedDate = generateDate((json \ "posted" \ "date-time").extractOrElse[String](null), (json \ "posted" \ "date-parts").extract[List[List[Int]]], "available", ModelConstants.DNET_DATACITE_DATE) - val acceptedDate = generateDate((json \ "accepted" \ "date-time").extractOrElse[String](null), (json \ "accepted" \ "date-parts").extract[List[List[Int]]], "accepted", ModelConstants.DNET_DATACITE_DATE) - val publishedPrintDate = generateDate((json \ "published-print" \ "date-time").extractOrElse[String](null), (json \ "published-print" \ "date-parts").extract[List[List[Int]]], "published-print", ModelConstants.DNET_DATACITE_DATE) - val publishedOnlineDate = generateDate((json \ "published-online" \ "date-time").extractOrElse[String](null), (json \ "published-online" \ "date-parts").extract[List[List[Int]]], "published-online", ModelConstants.DNET_DATACITE_DATE) - - val issuedDate = extractDate((json \ "issued" \ "date-time").extractOrElse[String](null), (json \ "issued" \ "date-parts").extract[List[List[Int]]]) - if (StringUtils.isNotBlank(issuedDate)) { - result.setDateofacceptance(asField(issuedDate)) - } - else { - result.setDateofacceptance(asField(createdDate.getValue)) - } - result.setRelevantdate(List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate).filter(p => p != null).asJava) - - //Mapping Subject - val subjectList:List[String] = (json \ "subject").extractOrElse[List[String]](List()) - - if (subjectList.nonEmpty) { - result.setSubject(subjectList.map(s=> createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava) - } - - - - //Mapping Author - val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List()) - - - - val sorted_list = authorList.sortWith((a:mappingAuthor, b:mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")) - - result.setAuthor(sorted_list.zipWithIndex.map{case (a, index) => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)}.asJava) - - // Mapping instance - val instance = new Instance() - val license = for { - JObject(license) <- json \ "license" - JField("URL", JString(lic)) <- license - JField("content-version", JString(content_version)) <- license - } yield (asField(lic), content_version) - val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) - if (l.nonEmpty){ - if (l exists (d => d._2.equals("vor"))){ - for(d <- l){ - if (d._2.equals("vor")){ - instance.setLicense(d._1) - } - } - } - else{ - instance.setLicense(l.head._1)} - } - - // Ticket #6281 added pid to Instance - instance.setPid(result.getPid) - - val has_review = json \ "relation" \"has-review" \ "id" - - if(has_review != JNothing) { - instance.setRefereed( - OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS)) - } - - instance.setAccessright(decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)) - instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) - - instance.setCollectedfrom(createCrossrefCollectedFrom()) - if (StringUtils.isNotBlank(issuedDate)) { - instance.setDateofacceptance(asField(issuedDate)) - } - else { - instance.setDateofacceptance(asField(createdDate.getValue)) - } - val s: List[String] = List("https://doi.org/" + doi) -// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct -// if (links.nonEmpty) { -// instance.setUrl(links.asJava) -// } - if(s.nonEmpty) - { - instance.setUrl(s.asJava) - } - - result.setInstance(List(instance).asJava) - - //IMPORTANT - //The old method result.setId(generateIdentifier(result, doi)) - //is replaced using IdentifierFactory, but the old identifier - //is preserved among the originalId(s) - val oldId = generateIdentifier(result, doi) - result.setId(oldId) - - val newId = IdentifierFactory.createDOIBoostIdentifier(result) - if (!oldId.equalsIgnoreCase(newId)) { - result.getOriginalId.add(oldId) - } - result.setId(newId) - - if (result.getId == null) - null - else - result - } - - - def generateAuhtor(given: String, family: String, orcid: String, index:Int): Author = { - val a = new Author - a.setName(given) - a.setSurname(family) - a.setFullname(s"$given $family") - a.setRank(index+1) - if (StringUtils.isNotBlank(orcid)) - a.setPid(List(createSP(orcid, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES, generateDataInfo())).asJava) - - a - } - - def convert(input: String): List[Oaf] = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: json4s.JValue = parse(input) - - var resultList: List[Oaf] = List() - - - val objectType = (json \ "type").extractOrElse[String](null) - val objectSubType = (json \ "subtype").extractOrElse[String](null) - if (objectType == null) - return resultList - - - val result = generateItemFromType(objectType, objectSubType) - if (result == null) - return List() - val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")) - mappingResult(result, json, cOBJCategory) - if (result == null || result.getId == null) - return List() - - - val funderList: List[mappingFunder] = (json \ "funder").extractOrElse[List[mappingFunder]](List()) - - if (funderList.nonEmpty) { - resultList = resultList ::: mappingFunderToRelations(funderList, result.getId, createCrossrefCollectedFrom(), result.getDataInfo, result.getLastupdatetimestamp) - } - - - result match { - case publication: Publication => convertPublication(publication, json, cOBJCategory) - case dataset: Dataset => convertDataset(dataset) - } - - resultList = resultList ::: List(result) - resultList - } - - - def mappingFunderToRelations(funders: List[mappingFunder], sourceId: String, cf: KeyValue, di: DataInfo, ts: Long): List[Relation] = { - - val queue = new mutable.Queue[Relation] - - - def snsfRule(award:String): String = { - val tmp1 = StringUtils.substringAfter(award,"_") - val tmp2 = StringUtils.substringBefore(tmp1,"/") - logger.debug(s"From $award to $tmp2") - tmp2 - - - } - - - def extractECAward(award: String): String = { - val awardECRegex: Regex = "[0-9]{4,9}".r - if (awardECRegex.findAllIn(award).hasNext) - return awardECRegex.findAllIn(award).max - null - } - - - def generateRelation(sourceId:String, targetId:String, relClass:String) :Relation = { - - val r = new Relation - r.setSource(sourceId) - r.setTarget(targetId) - r.setRelType(ModelConstants.RESULT_PROJECT) - r.setRelClass(relClass) - r.setSubRelType(ModelConstants.OUTCOME) - r.setCollectedfrom(List(cf).asJava) - r.setDataInfo(di) - r.setLastupdatetimestamp(ts) - r - - - } - - - def generateSimpleRelationFromAward(funder: mappingFunder, nsPrefix: String, extractField: String => String): Unit = { - if (funder.award.isDefined && funder.award.get.nonEmpty) - funder.award.get.map(extractField).filter(a => a!= null && a.nonEmpty).foreach( - award => { - val targetId = getProjectId(nsPrefix, DHPUtils.md5(award)) - queue += generateRelation(sourceId, targetId , ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId , sourceId, ModelConstants.PRODUCES) - } - ) - } - - def getProjectId (nsPrefix:String, targetId:String):String = { - s"40|$nsPrefix::$targetId" - } - - - if (funders != null) - funders.foreach(funder => { - if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { - funder.DOI.get match { - case "10.13039/100010663" | - "10.13039/100010661" | - "10.13039/501100007601" | - "10.13039/501100000780" | - "10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100011199" | - "10.13039/100004431" | - "10.13039/501100004963" | - "10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) - case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) - case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) - case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) - case "10.13039/501100000038"=> val targetId = getProjectId("nserc_______" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) - case "10.13039/501100000155"=> val targetId = getProjectId("sshrc_______" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) - case "10.13039/501100000024"=> val targetId = getProjectId("cihr________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) - case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) - case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) - case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a=>a) - case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) - case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a=>a) - val targetId = getProjectId("miur________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) - case "10.13039/501100006588" | - "10.13039/501100004488" => generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") ) - case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a) - case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snsfRule) - case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a) - case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a) - case "10.13039/100004440"=> val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) - queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) - - case _ => logger.debug("no match for "+funder.DOI.get ) - - - } - - - } else { - funder.name match { - case "European Union’s Horizon 2020 research and innovation program" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "European Union's" => - generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - case "The French National Research Agency (ANR)" | - "The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) - case "Wellcome Trust Masters Fellowship" => val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY ) - queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES ) - case _ => logger.debug("no match for "+funder.name ) - - } - } - - } - ) - queue.toList - } - - def convertDataset(dataset: Dataset): Unit = { - // TODO check if there are other info to map into the Dataset - } - - - def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - val containerTitles = for {JString(ct) <- json \ "container-title"} yield ct - - - //Mapping book - if (cobjCategory.toLowerCase.contains("book")) { - val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn - if (ISBN.nonEmpty && containerTitles.nonEmpty) { - val source = s"${containerTitles.head} ISBN: ${ISBN.head}" - if (publication.getSource != null) { - val l: List[Field[String]] = publication.getSource.asScala.toList - val ll: List[Field[String]] = l ::: List(asField(source)) - publication.setSource(ll.asJava) - } - else - publication.setSource(List(asField(source)).asJava) - } - } else { - // Mapping Journal - - val issnInfos = for {JArray(issn_types) <- json \ "issn-type" - JObject(issn_type) <- issn_types - JField("type", JString(tp)) <- issn_type - JField("value", JString(vl)) <- issn_type - } yield Tuple2(tp, vl) - - val volume = (json \ "volume").extractOrElse[String](null) - if (containerTitles.nonEmpty) { - val journal = new Journal - journal.setName(containerTitles.head) - if (issnInfos.nonEmpty) { - - issnInfos.foreach(tp => { - tp._1 match { - case "electronic" => journal.setIssnOnline(tp._2) - case "print" => journal.setIssnPrinted(tp._2) - } - }) - } - journal.setVol(volume) - val page = (json \ "page").extractOrElse[String](null) - if (page != null) { - val pp = page.split("-") - if (pp.nonEmpty) - journal.setSp(pp.head) - if (pp.size > 1) - journal.setEp(pp(1)) - } - publication.setJournal(journal) - } - } - } - - def extractDate(dt: String, datePart: List[List[Int]]): String = { - if (StringUtils.isNotBlank(dt)) - return dt - if (datePart != null && datePart.size == 1) { - val res = datePart.head - if (res.size == 3) { - val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d" - if (dp.length == 10) { - return dp - } - } - } - null - - } - - def generateDate(dt: String, datePart: List[List[Int]], classId: String, schemeId: String): StructuredProperty = { - val dp = extractDate(dt, datePart) - if (StringUtils.isNotBlank(dp)) - return createSP(dp, classId, schemeId) - null - } - - def generateItemFromType(objectType: String, objectSubType: String): Result = { - if (mappingCrossrefType.contains(objectType)) { - if (mappingCrossrefType(objectType).equalsIgnoreCase("publication")) - return new Publication() - if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset")) - return new Dataset() - } - null - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala deleted file mode 100644 index a68d0bb2d..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ /dev/null @@ -1,94 +0,0 @@ -package eu.dnetlib.doiboost.mag - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.{SaveMode, SparkSession} -import org.apache.spark.sql.types._ -import org.slf4j.{Logger, LoggerFactory} - -object SparkImportMagIntoDataset { - val datatypedict = Map( - "bool" -> BooleanType, - "int" -> IntegerType, - "uint" -> IntegerType, - "long" -> LongType, - "ulong" -> LongType, - "float" -> FloatType, - "string" -> StringType, - "DateTime" -> DateType - ) - - - val stream = Map( - "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), - "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), - "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), - "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), - "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), - "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), - "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), - "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), - "PaperExtendedAttributes" -> Tuple2("mag/PaperExtendedAttributes.txt", Seq("PaperId:long", "AttributeType:int", "AttributeValue:string")), - "PaperFieldsOfStudy" -> Tuple2("advanced/PaperFieldsOfStudy.txt", Seq("PaperId:long", "FieldOfStudyId:long", "Score:float")), - "PaperMeSH" -> Tuple2("advanced/PaperMeSH.txt", Seq("PaperId:long", "DescriptorUI:string", "DescriptorName:string", "QualifierUI:string", "QualifierName:string", "IsMajorTopic:bool")), - "PaperRecommendations" -> Tuple2("advanced/PaperRecommendations.txt", Seq("PaperId:long", "RecommendedPaperId:long", "Score:float")), - "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")), - "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")), - "PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")), - "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "CreatedDate:DateTime")), - "RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float")) - ) - - - def getSchema(streamName: String): StructType = { - var schema = new StructType() - val d: Seq[String] = stream(streamName)._2 - d.foreach { case t => - val currentType = t.split(":") - val fieldName: String = currentType.head - var fieldType: String = currentType.last - val nullable: Boolean = fieldType.endsWith("?") - if (nullable) - fieldType = fieldType.replace("?", "") - schema = schema.add(StructField(fieldName, datatypedict(fieldType), nullable)) - } - schema - } - - - def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - - stream.foreach { case (k, v) => - val s: StructType = getSchema(k) - val df = spark.read - .option("header", "false") - .option("charset", "UTF8") - .option("delimiter", "\t") - .schema(s) - .csv(s"${parser.get("sourcePath")}/${v._1}") - logger.info(s"Converting $k") - - df.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/$k") - } - - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala deleted file mode 100644 index 016279787..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ /dev/null @@ -1,158 +0,0 @@ -package eu.dnetlib.doiboost.mag - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.doiboost.DoiBoostMappingUtil -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ -import org.apache.spark.sql._ -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ - -object SparkProcessMAG { - - def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={ - d.where(col("Doi").isNotNull) - .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) - .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) - .map(_._2)(Encoders.product[MagPapers]) - .map(mp => { - new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), - mp.DocType, mp.PaperTitle, mp.OriginalTitle, - mp.BookTitle, mp.Year, mp.Date, mp.Publisher: String, - mp.JournalId, mp.ConferenceSeriesId, mp.ConferenceInstanceId, - mp.Volume, mp.Issue, mp.FirstPage, mp.LastPage, - mp.ReferenceCount, mp.CitationCount, mp.EstimatedCitation, - mp.OriginalVenue, mp.FamilyId, mp.CreatedDate) - })(Encoders.product[MagPapers]) - } - - def main(args: Array[String]): Unit = { - - val logger: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - val sourcePath = parser.get("sourcePath") - val workingPath = parser.get("workingPath") - val targetPath = parser.get("targetPath") - - import spark.implicits._ - implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication] - implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs) - - logger.info("Phase 1) make uninue DOI in Papers:") - val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers] - - // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one - val distinctPaper: Dataset[MagPapers] = getDistinctResults(d) - - distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct") - - logger.info("Phase 0) Enrich Publication with description") - val pa = spark.read.load(s"$sourcePath/PaperAbstractsInvertedIndex").as[MagPaperAbstract] - pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"$workingPath/PaperAbstract") - - logger.info("Phase 3) Group Author by PaperId") - val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor] - - val affiliation = spark.read.load(s"$sourcePath/Affiliations").as[MagAffiliation] - val paperAuthorAffiliation = spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation] - - paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId"))) - .map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null, a.AuthorSequenceNumber)) } - .joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left") - .map(s => { - val mpa = s._1._2 - val af = s._2 - if (af != null) { - MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName, mpa.sequenceNumber) - } else - mpa - }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation", $"sequenceNumber")).as("authors")) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_1_paper_authors") - - logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors") - - val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal] - - val papers = spark.read.load((s"$workingPath/Papers_distinct")).as[MagPapers] - - val paperWithAuthors = spark.read.load(s"$workingPath/merge_step_1_paper_authors").as[MagPaperWithAuthorList] - - val firstJoin = papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left") - firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left") - .map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) } - .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_2") - - - var magPubs: Dataset[(String, Publication)] = - spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] - - - val conference = spark.read.load(s"$sourcePath/ConferenceInstances") - .select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" ) - val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) - .select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance] - - - magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") - .map(item => ConversionUtil.updatePubsWithConferenceInfo(item)) - .write - .mode(SaveMode.Overwrite) - .save(s"$workingPath/merge_step_3") - - - val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract] - - - magPubs = spark.read.load(s"$workingPath/merge_step_3").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] - - magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") - .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") - - - logger.info("Phase 7) Enrich Publication with FieldOfStudy") - - magPubs = spark.read.load(s"$workingPath/merge_step_4").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] - - val fos = spark.read.load(s"$sourcePath/FieldsOfStudy").select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType") - - val pfos = spark.read.load(s"$sourcePath/PaperFieldsOfStudy") - - val paperField = pfos.joinWith(fos, fos("fos").equalTo(pfos("FieldOfStudyId"))) - .select($"_1.FieldOfStudyId", $"_2.DisplayName", $"_2.MainType", $"_1.PaperId", $"_1.Score") - .groupBy($"PaperId").agg(collect_list(struct($"FieldOfStudyId", $"DisplayName", $"MainType", $"Score")).as("subjects")) - .as[MagFieldOfStudy] - - magPubs.joinWith(paperField, col("_1") - .equalTo(paperField("PaperId")), "left") - .map(item => ConversionUtil.updatePubsWithSubject(item)) - .write.mode(SaveMode.Overwrite) - .save(s"$workingPath/mag_publication") - - spark.read.load(s"$workingPath/mag_publication").as[Publication] - .filter(p => p.getId == null) - .groupByKey(p => p.getId) - .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) - .map(_._2) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") - - - - } -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala deleted file mode 100644 index 31f331912..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ /dev/null @@ -1,70 +0,0 @@ -package eu.dnetlib.doiboost.orcid - -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger -import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.dhp.schema.orcid.OrcidDOI -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -object SparkPreprocessORCID { - val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - - def fixORCIDItem(item :ORCIDItem):ORCIDItem = { - ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) - - } - - - def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { - import spark.implicits._ - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - - val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) - - spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) - - spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - - val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - - val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] - - works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i =>{ - val doi = i._1.doi - val author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) - .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] - .map(s => fixORCIDItem(s)) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") - } - - def main(args: Array[String]): Unit = { - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val sourcePath = parser.get("sourcePath") - val workingPath = parser.get("workingPath") - - run(spark, sourcePath, workingPath) - - } - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala similarity index 55% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index efd5d2497..20471973a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -1,27 +1,37 @@ package eu.dnetlib.doiboost -import java.time.LocalDate -import java.time.format.DateTimeFormatter - +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} +import java.time.LocalDate +import java.time.format.DateTimeFormatter import scala.collection.JavaConverters._ +case class HostedByItemType( + id: String, + officialname: String, + issn: String, + eissn: String, + lissn: String, + openAccess: Boolean +) {} -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} - -case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){} +case class DoiBoostAffiliation( + PaperId: Long, + AffiliationId: Long, + GridId: Option[String], + OfficialPage: Option[String], + DisplayName: Option[String] +) {} object DoiBoostMappingUtil { @@ -45,9 +55,19 @@ object DoiBoostMappingUtil { val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" val DOI_PREFIX = "10." - val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;") + val invalidName = List( + ",", + "none none", + "none, none", + "none &na;", + "(:null)", + "test test test", + "test test", + "test", + "&na; &na;" + ) - def toActionSet(item:Oaf) :(String, String) = { + def toActionSet(item: Oaf): (String, String) = { val mapper = new ObjectMapper() item match { @@ -77,59 +97,56 @@ object DoiBoostMappingUtil { } - - def toHostedByItem(input:String): (String, HostedByItemType) = { + def toHostedByItem(input: String): (String, HostedByItemType) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] (c.keys.head, c.values.head) } - - def toISSNPair(publication: Publication) : (String, Publication) = { + def toISSNPair(publication: Publication): (String, Publication) = { val issn = if (publication.getJournal == null) null else publication.getJournal.getIssnPrinted - val eissn =if (publication.getJournal == null) null else publication.getJournal.getIssnOnline - val lissn =if (publication.getJournal == null) null else publication.getJournal.getIssnLinking + val eissn = if (publication.getJournal == null) null else publication.getJournal.getIssnOnline + val lissn = if (publication.getJournal == null) null else publication.getJournal.getIssnLinking - if (issn!= null && issn.nonEmpty) + if (issn != null && issn.nonEmpty) (issn, publication) - else if(eissn!= null && eissn.nonEmpty) + else if (eissn != null && eissn.nonEmpty) (eissn, publication) - else if(lissn!= null && lissn.nonEmpty) + else if (lissn != null && lissn.nonEmpty) (lissn, publication) else (publication.getId, publication) } - - - - def generateGridAffiliationId(gridId:String) :String = { + def generateGridAffiliationId(gridId: String): String = { s"20|grid________::${DHPUtils.md5(gridId.toLowerCase().trim())}" } - - def fixResult(result: Dataset) :Dataset = { + def fixResult(result: Dataset): Dataset = { val instanceType = extractInstance(result) if (instanceType.isDefined) { result.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype)) } - result.getInstance().asScala.foreach(i => { - i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY) - }) + result + .getInstance() + .asScala + .foreach(i => { + i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY) + }) result } - - def decideAccessRight(lic : Field[String], date:String) : AccessRight = { - if(lic == null){ + def decideAccessRight(lic: Field[String], date: String): AccessRight = { + if (lic == null) { //Default value Unknown return getUnknownQualifier() } - val license : String = lic.getValue + val license: String = lic.getValue //CC licenses - if(license.startsWith("cc") || + if ( + license.startsWith("cc") || license.startsWith("http://creativecommons.org/licenses") || license.startsWith("https://creativecommons.org/licenses") || @@ -139,40 +156,44 @@ object DoiBoostMappingUtil { license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || //APA (considered OPEN also by Unpaywall) - license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")){ + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx") + ) { - val oaq : AccessRight = getOpenAccessQualifier() + val oaq: AccessRight = getOpenAccessQualifier() oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) return oaq } //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) - if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ + if ( + license.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) { val now = java.time.LocalDate.now - try{ + try { val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) - if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ - val oaq : AccessRight = getOpenAccessQualifier() + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = getOpenAccessQualifier() oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) return oaq - } - else{ + } else { return getEmbargoedAccessQualifier() } - }catch { + } catch { case e: Exception => { - try{ - val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) - if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ - val oaq : AccessRight = getOpenAccessQualifier() - oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) - return oaq - } - else{ - return getEmbargoedAccessQualifier() - } - }catch{ + try { + val pub_date = + LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) + if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) { + val oaq: AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } else { + return getEmbargoedAccessQualifier() + } + } catch { case ex: Exception => return getClosedAccessQualifier() } } @@ -185,64 +206,91 @@ object DoiBoostMappingUtil { } + def getOpenAccessQualifier(): AccessRight = { - - def getOpenAccessQualifier():AccessRight = { - - OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN,"Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight( + ModelConstants.ACCESS_RIGHT_OPEN, + "Open Access", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - def getRestrictedQualifier():AccessRight = { - OafMapperUtils.accessRight( "RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getRestrictedQualifier(): AccessRight = { + OafMapperUtils.accessRight( + "RESTRICTED", + "Restricted", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - - def getUnknownQualifier():AccessRight = { - OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getUnknownQualifier(): AccessRight = { + OafMapperUtils.accessRight( + ModelConstants.UNKNOWN, + ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - - def getEmbargoedAccessQualifier():AccessRight = { - OafMapperUtils.accessRight("EMBARGO","Embargo",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getEmbargoedAccessQualifier(): AccessRight = { + OafMapperUtils.accessRight( + "EMBARGO", + "Embargo", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - def getClosedAccessQualifier():AccessRight = { - OafMapperUtils.accessRight("CLOSED","Closed Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + def getClosedAccessQualifier(): AccessRight = { + OafMapperUtils.accessRight( + "CLOSED", + "Closed Access", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) } - - def extractInstance(r:Result):Option[Instance] = { - r.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty) + def extractInstance(r: Result): Option[Instance] = { + r.getInstance() + .asScala + .find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty) } - def fixPublication(input:((String,Publication), (String,HostedByItemType))): Publication = { + def fixPublication(input: ((String, Publication), (String, HostedByItemType))): Publication = { val publication = input._1._2 val item = if (input._2 != null) input._2._2 else null - val instanceType:Option[Instance] = extractInstance(publication) + val instanceType: Option[Instance] = extractInstance(publication) if (instanceType.isDefined) { - publication.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype)) + publication + .getInstance() + .asScala + .foreach(i => i.setInstancetype(instanceType.get.getInstancetype)) } - publication.getInstance().asScala.foreach(i => { - var hb = new KeyValue - if (item != null) { - hb.setValue(item.officialname) - hb.setKey(item.id) - if (item.openAccess) { - i.setAccessright(getOpenAccessQualifier()) - i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) - } + publication + .getInstance() + .asScala + .foreach(i => { + var hb = new KeyValue + if (item != null) { + hb.setValue(item.officialname) + hb.setKey(item.id) + if (item.openAccess) { + i.setAccessright(getOpenAccessQualifier()) + i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) + } - } - else { - hb = ModelConstants.UNKNOWN_REPOSITORY - } - i.setHostedby(hb) - }) + } else { + hb = ModelConstants.UNKNOWN_REPOSITORY + } + i.setHostedby(hb) + }) publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) @@ -272,17 +320,22 @@ object DoiBoostMappingUtil { if (publication.getTitle == null || publication.getTitle.size == 0) return false - - val s = publication.getTitle.asScala.count(p => p.getValue != null - && p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]")) + val s = publication.getTitle.asScala.count(p => + p.getValue != null + && p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]") + ) if (s == 0) return false // fixes #4360 (test publisher) - val publisher = if (publication.getPublisher != null) publication.getPublisher.getValue else null + val publisher = + if (publication.getPublisher != null) publication.getPublisher.getValue else null - if (publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher.equalsIgnoreCase("CrossRef Test Account"))) { + if ( + publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher + .equalsIgnoreCase("CrossRef Test Account")) + ) { return false; } @@ -290,18 +343,12 @@ object DoiBoostMappingUtil { if (publication.getAuthor == null || publication.getAuthor.size() == 0) return false - //filter invalid author val authors = publication.getAuthor.asScala.map(s => { if (s.getFullname.nonEmpty) { s.getFullname - } - else - s"${ - s.getName - } ${ - s.getSurname - }" + } else + s"${s.getName} ${s.getSurname}" }) val c = authors.count(isValidAuthorName) @@ -309,13 +356,16 @@ object DoiBoostMappingUtil { return false // fixes #4368 - if (authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(publication.getPublisher.getValue)) + if ( + authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase( + publication.getPublisher.getValue + ) + ) return false true } - def isValidAuthorName(fullName: String): Boolean = { if (fullName == null || fullName.isEmpty) return false @@ -324,32 +374,47 @@ object DoiBoostMappingUtil { true } - def generateDataInfo(trust: String): DataInfo = { val di = new DataInfo di.setDeletedbyinference(false) di.setInferred(false) di.setInvisible(false) di.setTrust(trust) - di.setProvenanceaction(OafMapperUtils.qualifier(ModelConstants.SYSIMPORT_ACTIONSET,ModelConstants.SYSIMPORT_ACTIONSET, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS)) + di.setProvenanceaction( + OafMapperUtils.qualifier( + ModelConstants.SYSIMPORT_ACTIONSET, + ModelConstants.SYSIMPORT_ACTIONSET, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS + ) + ) di } - - - def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = { + def createSP( + value: String, + classId: String, + className: String, + schemeId: String, + schemeName: String + ): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName)) + sp.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName)) sp.setValue(value) sp } - - - def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = { + def createSP( + value: String, + classId: String, + className: String, + schemeId: String, + schemeName: String, + dataInfo: DataInfo + ): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName)) + sp.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName)) sp.setValue(value) sp.setDataInfo(dataInfo) sp @@ -358,17 +423,20 @@ object DoiBoostMappingUtil { def createSP(value: String, classId: String, schemeId: String): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId)) + sp.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId)) sp.setValue(value) sp } - - - def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = { + def createSP( + value: String, + classId: String, + schemeId: String, + dataInfo: DataInfo + ): StructuredProperty = { val sp = new StructuredProperty - sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId)) + sp.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId)) sp.setValue(value) sp.setDataInfo(dataInfo) sp @@ -384,7 +452,6 @@ object DoiBoostMappingUtil { } - def createUnpayWallCollectedFrom(): KeyValue = { val cf = new KeyValue @@ -403,15 +470,11 @@ object DoiBoostMappingUtil { } - - def generateIdentifier (oaf: Result, doi: String): String = { - val id = DHPUtils.md5 (doi.toLowerCase) + def generateIdentifier(oaf: Result, doi: String): String = { + val id = DHPUtils.md5(doi.toLowerCase) s"50|${doiBoostNSPREFIX}${SEPARATOR}${id}" } - - - def createMAGCollectedFrom(): KeyValue = { val cf = new KeyValue @@ -426,19 +489,21 @@ object DoiBoostMappingUtil { tmp.setValue(value) tmp - } def isEmpty(x: String) = x == null || x.trim.isEmpty - def normalizeDoi(input : String) :String ={ - if(input == null) + def normalizeDoi(input: String): String = { + if (input == null) return null - val replaced = input.replaceAll("(?:\\n|\\r|\\t|\\s)", "").toLowerCase.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) - if (isEmpty(replaced)) + val replaced = input + .replaceAll("(?:\\n|\\r|\\t|\\s)", "") + .toLowerCase + .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) + if (isEmpty(replaced)) return null - if(replaced.indexOf("10.") < 0) + if (replaced.indexOf("10.") < 0) return null val ret = replaced.substring(replaced.indexOf("10.")) @@ -448,9 +513,6 @@ object DoiBoostMappingUtil { return ret - } - - } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala new file mode 100644 index 000000000..b6152526d --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDOIBoostActionSet.scala @@ -0,0 +1,109 @@ +package eu.dnetlib.doiboost + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.action.AtomicAction +import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset} +import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.Text +import org.apache.hadoop.io.compress.GzipCodec +import org.apache.hadoop.mapred.SequenceFileOutputFormat +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkGenerateDOIBoostActionSet { + val logger: Logger = LoggerFactory.getLogger(getClass) + + def main(args: Array[String]): Unit = { + + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] + implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] + implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] + implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] + implicit val mapEncoderAS: Encoder[(String, String)] = + Encoders.tuple(Encoders.STRING, Encoders.STRING) + + implicit val mapEncoderAtomiAction: Encoder[AtomicAction[OafDataset]] = + Encoders.kryo[AtomicAction[OafDataset]] + + val dbPublicationPath = parser.get("dbPublicationPath") + val dbDatasetPath = parser.get("dbDatasetPath") + val crossRefRelation = parser.get("crossRefRelation") + val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath") + val dbOrganizationPath = parser.get("dbOrganizationPath") + val sequenceFilePath = parser.get("sFilePath") + + val asDataset = spark.read + .load(dbDatasetPath) + .as[OafDataset] + .filter(p => p != null || p.getId != null) + .map(d => DoiBoostMappingUtil.fixResult(d)) + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) + + val asPublication = spark.read + .load(dbPublicationPath) + .as[Publication] + .filter(p => p != null || p.getId != null) + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) + + val asOrganization = spark.read + .load(dbOrganizationPath) + .as[Organization] + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) + + val asCRelation = spark.read + .load(crossRefRelation) + .as[Relation] + .filter(r => r != null && r.getSource != null && r.getTarget != null) + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) + + val asRelAffiliation = spark.read + .load(dbaffiliationRelationPath) + .as[Relation] + .map(d => DoiBoostMappingUtil.toActionSet(d))( + Encoders.tuple(Encoders.STRING, Encoders.STRING) + ) + + val d: Dataset[(String, String)] = asDataset + .union(asPublication) + .union(asOrganization) + .union(asCRelation) + .union(asRelAffiliation) + + d.rdd + .repartition(6000) + .map(s => (new Text(s._1), new Text(s._2))) + .saveAsHadoopFile( + s"$sequenceFilePath", + classOf[Text], + classOf[Text], + classOf[SequenceFileOutputFormat[Text, Text]], + classOf[GzipCodec] + ) + + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala new file mode 100644 index 000000000..9323c994c --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -0,0 +1,308 @@ +package eu.dnetlib.doiboost + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.merge.AuthorMerger +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset} +import eu.dnetlib.doiboost.mag.ConversionUtil +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.functions.col +import org.apache.spark.sql._ +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} +import scala.collection.JavaConverters._ + +object SparkGenerateDoiBoost { + + def extractIdGRID(input: String): List[(String, String)] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + + val id: String = (json \ "id").extract[String] + + val grids: List[String] = for { + + JObject(pid) <- json \ "pid" + JField("qualifier", JObject(qualifier)) <- pid + JField("classid", JString(classid)) <- qualifier + JField("value", JString(vl)) <- pid + if classid == "GRID" + } yield vl + grids.map(g => (id, s"unresolved::grid::${g.toLowerCase}"))(collection.breakOut) + } + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + import spark.implicits._ + + val hostedByMapPath = parser.get("hostedByMapPath") + val workingDirPath = parser.get("workingPath") + val openaireOrganizationPath = parser.get("openaireOrganizationPath") + + val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { + override def zero: Publication = new Publication + + override def reduce(b: Publication, a: (String, Publication)): Publication = { + + if (b == null) { + if (a != null && a._2 != null) { + a._2.setId(a._1) + return a._2 + } + } else { + if (a != null && a._2 != null) { + b.mergeFrom(a._2) + b.setId(a._1) + val authors = AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor) + b.setAuthor(authors) + return b + } + } + new Publication + } + + override def merge(b1: Publication, b2: Publication): Publication = { + if (b1 == null) { + if (b2 != null) + return b2 + } else { + if (b2 != null) { + b1.mergeFrom(b2) + val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) + b1.setAuthor(authors) + if (b2.getId != null && b2.getId.nonEmpty) + b1.setId(b2.getId) + return b1 + } + } + new Publication + } + + override def finish(reduction: Publication): Publication = reduction + + override def bufferEncoder: Encoder[Publication] = Encoders.kryo[Publication] + + override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication] + } + + implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] + implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] + implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] + implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = + Encoders.tuple(Encoders.STRING, mapEncoderPub) + implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] + + logger.info("Phase 2) Join Crossref with UnpayWall") + + val crossrefPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p)) + val uwPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/uwPublication").as[Publication].map(p => (p.getId, p)) + + def applyMerge(item: ((String, Publication), (String, Publication))): Publication = { + val crossrefPub = item._1._2 + if (item._2 != null) { + val otherPub = item._2._2 + if (otherPub != null) { + crossrefPub.mergeFrom(otherPub) + crossrefPub.setAuthor(AuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor)) + } + } + crossrefPub + } + + crossrefPublication + .joinWith(uwPublication, crossrefPublication("_1").equalTo(uwPublication("_1")), "left") + .map(applyMerge) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/firstJoin") + logger.info("Phase 3) Join Result with ORCID") + val fj: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p)) + val orcidPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p)) + fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left") + .map(applyMerge) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/secondJoin") + + logger.info("Phase 4) Join Result with MAG") + val sj: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p)) + + val magPublication: Dataset[(String, Publication)] = + spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p)) + sj.joinWith(magPublication, sj("_1").equalTo(magPublication("_1")), "left") + .map(applyMerge) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublication") + + val doiBoostPublication: Dataset[(String, Publication)] = spark.read + .load(s"$workingDirPath/doiBoostPublication") + .as[Publication] + .filter(p => DoiBoostMappingUtil.filterPublication(p)) + .map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder) + + val hostedByDataset: Dataset[(String, HostedByItemType)] = spark.createDataset( + spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem) + ) + + doiBoostPublication + .joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left") + .map(DoiBoostMappingUtil.fixPublication) + .map(p => (p.getId, p)) + .groupByKey(_._1) + .agg(crossrefAggregator.toColumn) + .map(p => p._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublicationFiltered") + + val affiliationPath = parser.get("affiliationPath") + val paperAffiliationPath = parser.get("paperAffiliationPath") + + val affiliation = spark.read + .load(affiliationPath) + .select(col("AffiliationId"), col("GridId"), col("OfficialPage"), col("DisplayName")) + + val paperAffiliation = spark.read + .load(paperAffiliationPath) + .select(col("AffiliationId").alias("affId"), col("PaperId")) + + val a: Dataset[DoiBoostAffiliation] = paperAffiliation + .joinWith(affiliation, paperAffiliation("affId").equalTo(affiliation("AffiliationId"))) + .select( + col("_1.PaperId"), + col("_2.AffiliationId"), + col("_2.GridId"), + col("_2.OfficialPage"), + col("_2.DisplayName") + ) + .as[DoiBoostAffiliation] + + val magPubs: Dataset[(String, Publication)] = spark.read + .load(s"$workingDirPath/doiBoostPublicationFiltered") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))( + tupleForJoinEncoder + ) + .filter(s => s._1 != null) + + magPubs + .joinWith(a, magPubs("_1").equalTo(a("PaperId"))) + .flatMap(item => { + val pub: Publication = item._1._2 + val affiliation = item._2 + val affId: String = + if (affiliation.GridId.isDefined) + s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" + else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) + val r: Relation = new Relation + r.setSource(pub.getId) + r.setTarget(affId) + r.setRelType(ModelConstants.RESULT_ORGANIZATION) + r.setRelClass(ModelConstants.HAS_AUTHOR_INSTITUTION) + r.setSubRelType(ModelConstants.AFFILIATION) + r.setDataInfo(pub.getDataInfo) + r.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) + val r1: Relation = new Relation + r1.setTarget(pub.getId) + r1.setSource(affId) + r1.setRelType(ModelConstants.RESULT_ORGANIZATION) + r1.setRelClass(ModelConstants.IS_AUTHOR_INSTITUTION_OF) + r1.setSubRelType(ModelConstants.AFFILIATION) + r1.setDataInfo(pub.getDataInfo) + r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) + List(r, r1) + })(mapEncoderRel) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + + val unresolvedRels: Dataset[(String, Relation)] = spark.read + .load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + .as[Relation] + .map(r => { + + if (r.getSource.startsWith("unresolved")) + (r.getSource, r) + else if (r.getTarget.startsWith("unresolved")) + (r.getTarget, r) + else + ("resolved", r) + })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) + + val openaireOrganization: Dataset[(String, String)] = spark.read + .text(openaireOrganizationPath) + .as[String] + .flatMap(s => extractIdGRID(s)) + .groupByKey(_._2) + .reduceGroups((x, y) => if (x != null) x else y) + .map(_._2) + + unresolvedRels + .joinWith(openaireOrganization, unresolvedRels("_1").equalTo(openaireOrganization("_2"))) + .map { x => + val currentRels = x._1._2 + val currentOrgs = x._2 + if (currentOrgs != null) + if (currentRels.getSource.startsWith("unresolved")) + currentRels.setSource(currentOrgs._1) + else + currentRels.setTarget(currentOrgs._1) + currentRels + } + .filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostPublicationAffiliation") + + magPubs + .joinWith(a, magPubs("_1").equalTo(a("PaperId"))) + .map(item => { + val affiliation = item._2 + if (affiliation.GridId.isEmpty) { + val o = new Organization + o.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) + o.setDataInfo(DoiBoostMappingUtil.generateDataInfo()) + o.setId(DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString)) + o.setOriginalId(List(affiliation.AffiliationId.toString).asJava) + if (affiliation.DisplayName.nonEmpty) + o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get)) + if (affiliation.OfficialPage.isDefined) + o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get)) + o.setCountry(ModelConstants.UNKNOWN_COUNTRY) + o + } else + null + }) + .filter(o => o != null) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingDirPath/doiBoostOrganization") + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala new file mode 100644 index 000000000..0cb08ea94 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -0,0 +1,626 @@ +package eu.dnetlib.doiboost.crossref + +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} +import eu.dnetlib.dhp.utils.DHPUtils +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import org.apache.commons.lang.StringUtils +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.JsonAST._ +import org.json4s.jackson.JsonMethods._ +import org.slf4j.{Logger, LoggerFactory} + +import java.util +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.matching.Regex + +case class CrossrefDT(doi: String, json: String, timestamp: Long) {} + +case class mappingAffiliation(name: String) {} + +case class mappingAuthor( + given: Option[String], + family: String, + sequence: Option[String], + ORCID: Option[String], + affiliation: Option[mappingAffiliation] +) {} + +case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} + +case object Crossref2Oaf { + val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) + + val mappingCrossrefType = Map( + "book-section" -> "publication", + "book" -> "publication", + "book-chapter" -> "publication", + "book-part" -> "publication", + "book-series" -> "publication", + "book-set" -> "publication", + "book-track" -> "publication", + "edited-book" -> "publication", + "reference-book" -> "publication", + "monograph" -> "publication", + "journal-article" -> "publication", + "dissertation" -> "publication", + "other" -> "publication", + "peer-review" -> "publication", + "proceedings" -> "publication", + "proceedings-article" -> "publication", + "reference-entry" -> "publication", + "report" -> "publication", + "report-series" -> "publication", + "standard" -> "publication", + "standard-series" -> "publication", + "posted-content" -> "publication", + "dataset" -> "dataset" + ) + + val mappingCrossrefSubType = Map( + "book-section" -> "0013 Part of book or chapter of book", + "book" -> "0002 Book", + "book-chapter" -> "0013 Part of book or chapter of book", + "book-part" -> "0013 Part of book or chapter of book", + "book-series" -> "0002 Book", + "book-set" -> "0002 Book", + "book-track" -> "0002 Book", + "edited-book" -> "0002 Book", + "reference-book" -> "0002 Book", + "monograph" -> "0002 Book", + "journal-article" -> "0001 Article", + "dissertation" -> "0044 Thesis", + "other" -> "0038 Other literature type", + "peer-review" -> "0015 Review", + "proceedings" -> "0004 Conference object", + "proceedings-article" -> "0004 Conference object", + "reference-entry" -> "0013 Part of book or chapter of book", + "report" -> "0017 Report", + "report-series" -> "0017 Report", + "standard" -> "0038 Other literature type", + "standard-series" -> "0038 Other literature type", + "dataset" -> "0021 Dataset", + "preprint" -> "0016 Preprint", + "report" -> "0017 Report" + ) + + def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + //MAPPING Crossref DOI into PID + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) + + //MAPPING Crossref DOI into OriginalId + //and Other Original Identifier of dataset like clinical-trial-number + val clinicalTrialNumbers = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr + val alternativeIds = for (JString(ids) <- json \ "alternative-id") yield ids + val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi) + + val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava) + result.setOriginalId(originalIds) + + // Add DataInfo + result.setDataInfo(generateDataInfo()) + + result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long]) + result.setDateofcollection((json \ "indexed" \ "date-time").extract[String]) + + result.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava) + + // Publisher ( Name of work's publisher mapped into Result/Publisher) + val publisher = (json \ "publisher").extractOrElse[String](null) + if (publisher != null && publisher.nonEmpty) + result.setPublisher(asField(publisher)) + + // TITLE + val mainTitles = + for { JString(title) <- json \ "title" if title.nonEmpty } yield createSP( + title, + "main title", + ModelConstants.DNET_DATACITE_TITLE + ) + val originalTitles = for { + JString(title) <- json \ "original-title" if title.nonEmpty + } yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val shortTitles = for { + JString(title) <- json \ "short-title" if title.nonEmpty + } yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val subtitles = + for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield createSP( + title, + "subtitle", + ModelConstants.DNET_DATACITE_TITLE + ) + result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) + + // DESCRIPTION + val descriptionList = + for { JString(description) <- json \ "abstract" } yield asField(description) + result.setDescription(descriptionList.asJava) + + // Source + val sourceList = for { + JString(source) <- json \ "source" if source != null && source.nonEmpty + } yield asField(source) + result.setSource(sourceList.asJava) + + //RELEVANT DATE Mapping + val createdDate = generateDate( + (json \ "created" \ "date-time").extract[String], + (json \ "created" \ "date-parts").extract[List[List[Int]]], + "created", + ModelConstants.DNET_DATACITE_DATE + ) + val postedDate = generateDate( + (json \ "posted" \ "date-time").extractOrElse[String](null), + (json \ "posted" \ "date-parts").extract[List[List[Int]]], + "available", + ModelConstants.DNET_DATACITE_DATE + ) + val acceptedDate = generateDate( + (json \ "accepted" \ "date-time").extractOrElse[String](null), + (json \ "accepted" \ "date-parts").extract[List[List[Int]]], + "accepted", + ModelConstants.DNET_DATACITE_DATE + ) + val publishedPrintDate = generateDate( + (json \ "published-print" \ "date-time").extractOrElse[String](null), + (json \ "published-print" \ "date-parts").extract[List[List[Int]]], + "published-print", + ModelConstants.DNET_DATACITE_DATE + ) + val publishedOnlineDate = generateDate( + (json \ "published-online" \ "date-time").extractOrElse[String](null), + (json \ "published-online" \ "date-parts").extract[List[List[Int]]], + "published-online", + ModelConstants.DNET_DATACITE_DATE + ) + + val issuedDate = extractDate( + (json \ "issued" \ "date-time").extractOrElse[String](null), + (json \ "issued" \ "date-parts").extract[List[List[Int]]] + ) + if (StringUtils.isNotBlank(issuedDate)) { + result.setDateofacceptance(asField(issuedDate)) + } else { + result.setDateofacceptance(asField(createdDate.getValue)) + } + result.setRelevantdate( + List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate) + .filter(p => p != null) + .asJava + ) + + //Mapping Subject + val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List()) + + if (subjectList.nonEmpty) { + result.setSubject( + subjectList.map(s => createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava + ) + } + + //Mapping Author + val authorList: List[mappingAuthor] = + (json \ "author").extractOrElse[List[mappingAuthor]](List()) + + val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => + a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") + ) + + result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => + generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) + }.asJava) + + // Mapping instance + val instance = new Instance() + val license = for { + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license + JField("content-version", JString(content_version)) <- license + } yield (asField(lic), content_version) + val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) + if (l.nonEmpty) { + if (l exists (d => d._2.equals("vor"))) { + for (d <- l) { + if (d._2.equals("vor")) { + instance.setLicense(d._1) + } + } + } else { + instance.setLicense(l.head._1) + } + } + + // Ticket #6281 added pid to Instance + instance.setPid(result.getPid) + + val has_review = json \ "relation" \ "has-review" \ "id" + + if (has_review != JNothing) { + instance.setRefereed( + OafMapperUtils.qualifier( + "0001", + "peerReviewed", + ModelConstants.DNET_REVIEW_LEVELS, + ModelConstants.DNET_REVIEW_LEVELS + ) + ) + } + + instance.setAccessright( + decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) + ) + instance.setInstancetype( + OafMapperUtils.qualifier( + cobjCategory.substring(0, 4), + cobjCategory.substring(5), + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + result.setResourcetype( + OafMapperUtils.qualifier( + cobjCategory.substring(0, 4), + cobjCategory.substring(5), + ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE + ) + ) + + instance.setCollectedfrom(createCrossrefCollectedFrom()) + if (StringUtils.isNotBlank(issuedDate)) { + instance.setDateofacceptance(asField(issuedDate)) + } else { + instance.setDateofacceptance(asField(createdDate.getValue)) + } + val s: List[String] = List("https://doi.org/" + doi) +// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct +// if (links.nonEmpty) { +// instance.setUrl(links.asJava) +// } + if (s.nonEmpty) { + instance.setUrl(s.asJava) + } + + result.setInstance(List(instance).asJava) + + //IMPORTANT + //The old method result.setId(generateIdentifier(result, doi)) + //is replaced using IdentifierFactory, but the old identifier + //is preserved among the originalId(s) + val oldId = generateIdentifier(result, doi) + result.setId(oldId) + + val newId = IdentifierFactory.createDOIBoostIdentifier(result) + if (!oldId.equalsIgnoreCase(newId)) { + result.getOriginalId.add(oldId) + } + result.setId(newId) + + if (result.getId == null) + null + else + result + } + + def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = { + val a = new Author + a.setName(given) + a.setSurname(family) + a.setFullname(s"$given $family") + a.setRank(index + 1) + if (StringUtils.isNotBlank(orcid)) + a.setPid( + List( + createSP( + orcid, + ModelConstants.ORCID_PENDING, + ModelConstants.DNET_PID_TYPES, + generateDataInfo() + ) + ).asJava + ) + + a + } + + def convert(input: String): List[Oaf] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + + var resultList: List[Oaf] = List() + + val objectType = (json \ "type").extractOrElse[String](null) + val objectSubType = (json \ "subtype").extractOrElse[String](null) + if (objectType == null) + return resultList + + val result = generateItemFromType(objectType, objectSubType) + if (result == null) + return List() + val cOBJCategory = mappingCrossrefSubType.getOrElse( + objectType, + mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type") + ) + mappingResult(result, json, cOBJCategory) + if (result == null || result.getId == null) + return List() + + val funderList: List[mappingFunder] = + (json \ "funder").extractOrElse[List[mappingFunder]](List()) + + if (funderList.nonEmpty) { + resultList = resultList ::: mappingFunderToRelations( + funderList, + result.getId, + createCrossrefCollectedFrom(), + result.getDataInfo, + result.getLastupdatetimestamp + ) + } + + result match { + case publication: Publication => convertPublication(publication, json, cOBJCategory) + case dataset: Dataset => convertDataset(dataset) + } + + resultList = resultList ::: List(result) + resultList + } + + def mappingFunderToRelations( + funders: List[mappingFunder], + sourceId: String, + cf: KeyValue, + di: DataInfo, + ts: Long + ): List[Relation] = { + + val queue = new mutable.Queue[Relation] + + def snsfRule(award: String): String = { + val tmp1 = StringUtils.substringAfter(award, "_") + val tmp2 = StringUtils.substringBefore(tmp1, "/") + logger.debug(s"From $award to $tmp2") + tmp2 + + } + + def extractECAward(award: String): String = { + val awardECRegex: Regex = "[0-9]{4,9}".r + if (awardECRegex.findAllIn(award).hasNext) + return awardECRegex.findAllIn(award).max + null + } + + def generateRelation(sourceId: String, targetId: String, relClass: String): Relation = { + + val r = new Relation + r.setSource(sourceId) + r.setTarget(targetId) + r.setRelType(ModelConstants.RESULT_PROJECT) + r.setRelClass(relClass) + r.setSubRelType(ModelConstants.OUTCOME) + r.setCollectedfrom(List(cf).asJava) + r.setDataInfo(di) + r.setLastupdatetimestamp(ts) + r + + } + + def generateSimpleRelationFromAward( + funder: mappingFunder, + nsPrefix: String, + extractField: String => String + ): Unit = { + if (funder.award.isDefined && funder.award.get.nonEmpty) + funder.award.get + .map(extractField) + .filter(a => a != null && a.nonEmpty) + .foreach(award => { + val targetId = getProjectId(nsPrefix, DHPUtils.md5(award)) + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + }) + } + + def getProjectId(nsPrefix: String, targetId: String): String = { + s"40|$nsPrefix::$targetId" + } + + if (funders != null) + funders.foreach(funder => { + if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { + funder.DOI.get match { + case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" | + "10.13039/100010665" => + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" => + generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + case "10.13039/501100000781" => + generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "10.13039/100000001" => + generateSimpleRelationFromAward(funder, "nsf_________", a => a) + case "10.13039/501100001665" => + generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "10.13039/501100002341" => + generateSimpleRelationFromAward(funder, "aka_________", a => a) + case "10.13039/501100001602" => + generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) + case "10.13039/501100000923" => + generateSimpleRelationFromAward(funder, "arc_________", a => a) + case "10.13039/501100000038" => + val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100000155" => + val targetId = getProjectId("sshrc_______", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100000024" => + val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100002848" => + generateSimpleRelationFromAward(funder, "conicytf____", a => a) + case "10.13039/501100003448" => + generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) + case "10.13039/501100010198" => + generateSimpleRelationFromAward(funder, "sgov________", a => a) + case "10.13039/501100004564" => + generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) + case "10.13039/501100003407" => + generateSimpleRelationFromAward(funder, "miur________", a => a) + val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case "10.13039/501100006588" | "10.13039/501100004488" => + generateSimpleRelationFromAward( + funder, + "irb_hr______", + a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", "") + ) + case "10.13039/501100006769" => + generateSimpleRelationFromAward(funder, "rsf_________", a => a) + case "10.13039/501100001711" => + generateSimpleRelationFromAward(funder, "snsf________", snsfRule) + case "10.13039/501100004410" => + generateSimpleRelationFromAward(funder, "tubitakf____", a => a) + case "10.10.13039/100004440" => + generateSimpleRelationFromAward(funder, "wt__________", a => a) + case "10.13039/100004440" => + val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + + case _ => logger.debug("no match for " + funder.DOI.get) + + } + + } else { + funder.name match { + case "European Union’s Horizon 2020 research and innovation program" => + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "European Union's" => + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + case "The French National Research Agency (ANR)" | "The French National Research Agency" => + generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "CONICYT, Programa de Formación de Capital Humano Avanzado" => + generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) + case "Wellcome Trust Masters Fellowship" => + val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case _ => logger.debug("no match for " + funder.name) + + } + } + + }) + queue.toList + } + + def convertDataset(dataset: Dataset): Unit = { + // TODO check if there are other info to map into the Dataset + } + + def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct + + //Mapping book + if (cobjCategory.toLowerCase.contains("book")) { + val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn + if (ISBN.nonEmpty && containerTitles.nonEmpty) { + val source = s"${containerTitles.head} ISBN: ${ISBN.head}" + if (publication.getSource != null) { + val l: List[Field[String]] = publication.getSource.asScala.toList + val ll: List[Field[String]] = l ::: List(asField(source)) + publication.setSource(ll.asJava) + } else + publication.setSource(List(asField(source)).asJava) + } + } else { + // Mapping Journal + + val issnInfos = for { + JArray(issn_types) <- json \ "issn-type" + JObject(issn_type) <- issn_types + JField("type", JString(tp)) <- issn_type + JField("value", JString(vl)) <- issn_type + } yield Tuple2(tp, vl) + + val volume = (json \ "volume").extractOrElse[String](null) + if (containerTitles.nonEmpty) { + val journal = new Journal + journal.setName(containerTitles.head) + if (issnInfos.nonEmpty) { + + issnInfos.foreach(tp => { + tp._1 match { + case "electronic" => journal.setIssnOnline(tp._2) + case "print" => journal.setIssnPrinted(tp._2) + } + }) + } + journal.setVol(volume) + val page = (json \ "page").extractOrElse[String](null) + if (page != null) { + val pp = page.split("-") + if (pp.nonEmpty) + journal.setSp(pp.head) + if (pp.size > 1) + journal.setEp(pp(1)) + } + publication.setJournal(journal) + } + } + } + + def extractDate(dt: String, datePart: List[List[Int]]): String = { + if (StringUtils.isNotBlank(dt)) + return dt + if (datePart != null && datePart.size == 1) { + val res = datePart.head + if (res.size == 3) { + val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d" + if (dp.length == 10) { + return dp + } + } + } + null + + } + + def generateDate( + dt: String, + datePart: List[List[Int]], + classId: String, + schemeId: String + ): StructuredProperty = { + val dp = extractDate(dt, datePart) + if (StringUtils.isNotBlank(dp)) + return createSP(dp, classId, schemeId) + null + } + + def generateItemFromType(objectType: String, objectSubType: String): Result = { + if (mappingCrossrefType.contains(objectType)) { + if (mappingCrossrefType(objectType).equalsIgnoreCase("publication")) + return new Publication() + if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset")) + return new Dataset() + } + null + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala similarity index 61% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 159b817c7..c6e4706d7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -6,7 +6,7 @@ import org.apache.commons.io.IOUtils import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -16,33 +16,36 @@ object CrossrefDataset { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - - def to_item(input:String):CrossrefDT = { + def to_item(input: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val ts: Long = (json \ "indexed" \ "timestamp").extract[Long] + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) CrossrefDT(doi, input, ts) } def main(args: Array[String]): Unit = { - - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + CrossrefDataset.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkMapDumpIntoOAF.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() import spark.implicits._ - val crossrefAggregator = new Aggregator[CrossrefDT, CrossrefDT, CrossrefDT] with Serializable { override def zero: CrossrefDT = null @@ -53,8 +56,7 @@ object CrossrefDataset { if (a == null) return b - - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -66,7 +68,7 @@ object CrossrefDataset { if (a == null) return b - if(a.timestamp >b.timestamp) { + if (a.timestamp > b.timestamp) { return a } b @@ -79,21 +81,26 @@ object CrossrefDataset { override def finish(reduction: CrossrefDT): CrossrefDT = reduction } - val workingPath:String = parser.get("workingPath") - - - val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] + val workingPath: String = parser.get("workingPath") + val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] val update = - spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) - .map(i =>CrossrefImporter.decompressBlob(i._2.toString)) - .map(i =>to_item(i))) + spark.createDataset( + spark.sparkContext + .sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i => CrossrefImporter.decompressBlob(i._2.toString)) + .map(i => to_item(i)) + ) - main_ds.union(update).groupByKey(_.doi) + main_ds + .union(update) + .groupByKey(_.doi) .agg(crossrefAggregator.toColumn) - .map(s=>s._2) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") + .map(s => s._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala similarity index 58% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index 526ff7b3a..df185910e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -2,17 +2,12 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item -import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass -import org.apache.hadoop.io.{IntWritable, Text} -import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.json4s import org.json4s.DefaultFormats -import org.json4s.JsonAST.JArray -import org.json4s.jackson.JsonMethods.{compact, parse, render} +import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -23,12 +18,10 @@ object GenerateCrossrefDataset { implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT] - - def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) - val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long] CrossrefDT(doi, meta, timestamp) @@ -36,13 +29,23 @@ object GenerateCrossrefDataset { def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" + ) + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val spark: SparkSession = SparkSession.builder().config(conf) + val spark: SparkSession = SparkSession + .builder() + .config(conf) .appName(UnpackCrtossrefEntries.getClass.getSimpleName) .master(master) .getOrCreate() @@ -50,15 +53,17 @@ object GenerateCrossrefDataset { import spark.implicits._ + val tmp: RDD[String] = sc.textFile(sourcePath, 6000) - val tmp : RDD[String] = sc.textFile(sourcePath,6000) - - spark.createDataset(tmp) + spark + .createDataset(tmp) .map(entry => crossrefElement(entry)) - .write.mode(SaveMode.Overwrite).save(targetPath) -// .map(meta => crossrefElement(meta)) -// .toDS.as[CrossrefDT] -// .write.mode(SaveMode.Overwrite).save(targetPath) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) + // .map(meta => crossrefElement(meta)) + // .toDS.as[CrossrefDT] + // .write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala similarity index 54% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index c65916610..96923f000 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -4,13 +4,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset} import org.apache.commons.io.IOUtils - import org.apache.spark.SparkConf - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - case class Reference(author: String, firstPage: String) {} object SparkMapDumpIntoOAF { @@ -21,14 +18,21 @@ object SparkMapDumpIntoOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkMapDumpIntoOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkMapDumpIntoOAF.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] @@ -37,19 +41,34 @@ object SparkMapDumpIntoOAF { val targetPath = parser.get("targetPath") - spark.read.load(parser.get("sourcePath")).as[CrossrefDT] + spark.read + .load(parser.get("sourcePath")) + .as[CrossrefDT] .flatMap(k => Crossref2Oaf.convert(k.json)) .filter(o => o != null) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/mixObject") + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/mixObject") - val ds:Dataset[Oaf] = spark.read.load(s"$targetPath/mixObject").as[Oaf] + val ds: Dataset[Oaf] = spark.read.load(s"$targetPath/mixObject").as[Oaf] - ds.filter(o => o.isInstanceOf[Publication]).map(o => o.asInstanceOf[Publication]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefPublication") + ds.filter(o => o.isInstanceOf[Publication]) + .map(o => o.asInstanceOf[Publication]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/crossrefPublication") - ds.filter(o => o.isInstanceOf[Relation]).map(o => o.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefRelation") + ds.filter(o => o.isInstanceOf[Relation]) + .map(o => o.asInstanceOf[Relation]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/crossrefRelation") - ds.filter(o => o.isInstanceOf[OafDataset]).map(o => o.asInstanceOf[OafDataset]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefDataset") + ds.filter(o => o.isInstanceOf[OafDataset]) + .map(o => o.asInstanceOf[OafDataset]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/crossrefDataset") } - } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala similarity index 69% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala index 95ecb568b..3fea9695c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/UnpackCrtossrefEntries.scala @@ -2,8 +2,8 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.hadoop.io.compress.GzipCodec +import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST.JArray @@ -16,39 +16,43 @@ object UnpackCrtossrefEntries { val log: Logger = LoggerFactory.getLogger(UnpackCrtossrefEntries.getClass) - - - - def extractDump(input:String):List[String] = { + def extractDump(input: String): List[String] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val a = (json \ "items").extract[JArray] a.arr.map(s => compact(render(s))) - } - - def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json" + ) + ) + .mkString + ) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val spark: SparkSession = SparkSession.builder().config(conf) + val spark: SparkSession = SparkSession + .builder() + .config(conf) .appName(UnpackCrtossrefEntries.getClass.getSimpleName) .master(master) .getOrCreate() val sc: SparkContext = spark.sparkContext - sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2)) + sc.wholeTextFiles(sourcePath, 6000) + .flatMap(d => extractDump(d._2)) .saveAsTextFile(targetPath, classOf[GzipCodec]) - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala similarity index 53% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala index fd9629024..18ba864ce 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -1,72 +1,146 @@ package eu.dnetlib.doiboost.mag - import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex - -case class MagPapers(PaperId: Long, Rank: Integer, Doi: String, - DocType: String, PaperTitle: String, OriginalTitle: String, - BookTitle: String, Year: Option[Integer], Date: Option[java.sql.Timestamp], Publisher: String, - JournalId: Option[Long], ConferenceSeriesId: Option[Long], ConferenceInstanceId: Option[Long], - Volume: String, Issue: String, FirstPage: String, LastPage: String, - ReferenceCount: Option[Long], CitationCount: Option[Long], EstimatedCitation: Option[Long], - OriginalVenue: String, FamilyId: Option[Long], CreatedDate: java.sql.Timestamp) {} - +case class MagPapers( + PaperId: Long, + Rank: Integer, + Doi: String, + DocType: String, + PaperTitle: String, + OriginalTitle: String, + BookTitle: String, + Year: Option[Integer], + Date: Option[java.sql.Timestamp], + Publisher: String, + JournalId: Option[Long], + ConferenceSeriesId: Option[Long], + ConferenceInstanceId: Option[Long], + Volume: String, + Issue: String, + FirstPage: String, + LastPage: String, + ReferenceCount: Option[Long], + CitationCount: Option[Long], + EstimatedCitation: Option[Long], + OriginalVenue: String, + FamilyId: Option[Long], + CreatedDate: java.sql.Timestamp +) {} case class MagPaperAbstract(PaperId: Long, IndexedAbstract: String) {} -case class MagAuthor(AuthorId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], LastKnownAffiliationId: Option[Long], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {} +case class MagAuthor( + AuthorId: Long, + Rank: Option[Int], + NormalizedName: Option[String], + DisplayName: Option[String], + LastKnownAffiliationId: Option[Long], + PaperCount: Option[Long], + CitationCount: Option[Long], + CreatedDate: Option[java.sql.Timestamp] +) {} -case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String, DisplayName: String, GridId: String, OfficialPage: String, WikiPage: String, PaperCount: Long, CitationCount: Long, Latitude: Option[Float], Longitude: Option[Float], CreatedDate: java.sql.Timestamp) {} +case class MagAffiliation( + AffiliationId: Long, + Rank: Int, + NormalizedName: String, + DisplayName: String, + GridId: String, + OfficialPage: String, + WikiPage: String, + PaperCount: Long, + CitationCount: Long, + Latitude: Option[Float], + Longitude: Option[Float], + CreatedDate: java.sql.Timestamp +) {} -case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {} +case class MagPaperAuthorAffiliation( + PaperId: Long, + AuthorId: Long, + AffiliationId: Option[Long], + AuthorSequenceNumber: Int, + OriginalAuthor: String, + OriginalAffiliation: String +) {} - -case class MagAuthorAffiliation(author: MagAuthor, affiliation:String, sequenceNumber:Int) +case class MagAuthorAffiliation(author: MagAuthor, affiliation: String, sequenceNumber: Int) case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {} -case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String, sequenceNumber:Int) {} +case class MagPaperAuthorDenormalized( + PaperId: Long, + author: MagAuthor, + affiliation: String, + sequenceNumber: Int +) {} -case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {} +case class MagPaperUrl( + PaperId: Long, + SourceType: Option[Int], + SourceUrl: Option[String], + LanguageCode: Option[String] +) {} -case class MagUrlInstance(SourceUrl:String){} +case class MagUrlInstance(SourceUrl: String) {} case class MagUrl(PaperId: Long, instances: List[MagUrlInstance]) -case class MagSubject(FieldOfStudyId:Long, DisplayName:String, MainType:Option[String], Score:Float){} +case class MagSubject( + FieldOfStudyId: Long, + DisplayName: String, + MainType: Option[String], + Score: Float +) {} -case class MagFieldOfStudy(PaperId:Long, subjects:List[MagSubject]) {} +case class MagFieldOfStudy(PaperId: Long, subjects: List[MagSubject]) {} -case class MagJournal(JournalId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], Issn: Option[String], Publisher: Option[String], Webpage: Option[String], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {} +case class MagJournal( + JournalId: Long, + Rank: Option[Int], + NormalizedName: Option[String], + DisplayName: Option[String], + Issn: Option[String], + Publisher: Option[String], + Webpage: Option[String], + PaperCount: Option[Long], + CitationCount: Option[Long], + CreatedDate: Option[java.sql.Timestamp] +) {} - -case class MagConferenceInstance(ci:Long, DisplayName:Option[String], Location:Option[String], StartDate:Option[java.sql.Timestamp], EndDate:Option[java.sql.Timestamp], PaperId:Long){} +case class MagConferenceInstance( + ci: Long, + DisplayName: Option[String], + Location: Option[String], + StartDate: Option[java.sql.Timestamp], + EndDate: Option[java.sql.Timestamp], + PaperId: Long +) {} case object ConversionUtil { - def extractMagIdentifier(pids:mutable.Buffer[String]) :String ={ + def extractMagIdentifier(pids: mutable.Buffer[String]): String = { val magIDRegex: Regex = "^[0-9]+$".r - val s =pids.filter(p=> magIDRegex.findAllIn(p).hasNext) + val s = pids.filter(p => magIDRegex.findAllIn(p).hasNext) if (s.nonEmpty) return s.head null } - - def mergePublication(a: Publication, b:Publication) : Publication = { + def mergePublication(a: Publication, b: Publication): Publication = { if ((a != null) && (b != null)) { a.mergeFrom(b) a @@ -74,10 +148,9 @@ case object ConversionUtil { if (a == null) b else a } - } - def choiceLatestMagArtitcle(p1: MagPapers, p2:MagPapers) :MagPapers = { + def choiceLatestMagArtitcle(p1: MagPapers, p2: MagPapers): MagPapers = { var r = if (p1 == null) p2 else p1 if (p1 != null && p2 != null) { if (p1.CreatedDate != null && p2.CreatedDate != null) { @@ -93,8 +166,9 @@ case object ConversionUtil { } - - def updatePubsWithDescription(inputItem:((String, Publication), MagPaperAbstract)) : Publication = { + def updatePubsWithDescription( + inputItem: ((String, Publication), MagPaperAbstract) + ): Publication = { val pub = inputItem._1._2 val abst = inputItem._2 if (abst != null) { @@ -104,20 +178,22 @@ case object ConversionUtil { } + def updatePubsWithConferenceInfo( + inputItem: ((String, Publication), MagConferenceInstance) + ): Publication = { + val publication: Publication = inputItem._1._2 + val ci: MagConferenceInstance = inputItem._2 - def updatePubsWithConferenceInfo(inputItem:((String, Publication), MagConferenceInstance)) : Publication = { - val publication:Publication= inputItem._1._2 - val ci:MagConferenceInstance = inputItem._2 + if (ci != null) { - if (ci!= null){ - - val j:Journal = new Journal + val j: Journal = new Journal if (ci.Location.isDefined) j.setConferenceplace(ci.Location.get) j.setName(ci.DisplayName.get) - if (ci.StartDate.isDefined && ci.EndDate.isDefined) - { - j.setConferencedate(s"${ci.StartDate.get.toString.substring(0,10)} - ${ci.EndDate.get.toString.substring(0,10)}") + if (ci.StartDate.isDefined && ci.EndDate.isDefined) { + j.setConferencedate( + s"${ci.StartDate.get.toString.substring(0, 10)} - ${ci.EndDate.get.toString.substring(0, 10)}" + ) } publication.setJournal(j) @@ -125,7 +201,7 @@ case object ConversionUtil { publication } - def updatePubsWithSubject(item:((String, Publication), MagFieldOfStudy)) : Publication = { + def updatePubsWithSubject(item: ((String, Publication), MagFieldOfStudy)): Publication = { val publication = item._1._2 val fieldOfStudy = item._2 @@ -135,16 +211,34 @@ case object ConversionUtil { val classid = "MAG" val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => { - val s1 = createSP(s.DisplayName, classid,className, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES) + val s1 = createSP( + s.DisplayName, + classid, + className, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES + ) val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString) var resList: List[StructuredProperty] = List(s1) if (s.MainType.isDefined) { val maintp = s.MainType.get - val s2 = createSP(s.MainType.get, classid,className, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES) + val s2 = createSP( + s.MainType.get, + classid, + className, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES + ) s2.setDataInfo(di) resList = resList ::: List(s2) if (maintp.contains(".")) { - val s3 = createSP(maintp.split("\\.").head, classid,className, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES) + val s3 = createSP( + maintp.split("\\.").head, + classid, + className, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES + ) s3.setDataInfo(di) resList = resList ::: List(s3) } @@ -156,25 +250,27 @@ case object ConversionUtil { publication } - - def addInstances(a: (Publication, MagUrl)): Publication = { val pub = a._1 val urls = a._2 - - val i = new Instance + if (urls != null) { - if (urls!= null) { - - val l:List[String] = urls.instances.filter(k=>k.SourceUrl.nonEmpty).map(k=>k.SourceUrl):::List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}") + val l: List[String] = urls.instances + .filter(k => k.SourceUrl.nonEmpty) + .map(k => k.SourceUrl) ::: List( + s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}" + ) i.setUrl(l.asJava) - } - else - i.setUrl(List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}").asJava) + } else + i.setUrl( + List( + s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}" + ).asJava + ) // Ticket #6281 added pid to Instance i.setPid(pub.getPid) @@ -184,13 +280,13 @@ case object ConversionUtil { pub } - def transformPaperAbstract(input: MagPaperAbstract): MagPaperAbstract = { MagPaperAbstract(input.PaperId, convertInvertedIndexString(input.IndexedAbstract)) } - - def createOAFFromJournalAuthorPaper(inputParams: ((MagPapers, MagJournal), MagPaperWithAuthorList)): Publication = { + def createOAFFromJournalAuthorPaper( + inputParams: ((MagPapers, MagJournal), MagPaperWithAuthorList) + ): Publication = { val paper = inputParams._1._1 val journal = inputParams._1._2 val authors = inputParams._2 @@ -206,31 +302,37 @@ case object ConversionUtil { pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) val mainTitles = createSP(paper.PaperTitle, "main title", ModelConstants.DNET_DATACITE_TITLE) - val originalTitles = createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val originalTitles = + createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) pub.setTitle(List(mainTitles, originalTitles).asJava) pub.setSource(List(asField(paper.BookTitle)).asJava) val authorsOAF = authors.authors.map { f: MagAuthorAffiliation => - val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author a.setRank(f.sequenceNumber) if (f.author.DisplayName.isDefined) a.setFullname(f.author.DisplayName.get) - if(f.affiliation!= null) + if (f.affiliation != null) a.setAffiliation(List(asField(f.affiliation)).asJava) - a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", ModelConstants.DNET_PID_TYPES)).asJava) + a.setPid( + List( + createSP( + s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", + "URL", + ModelConstants.DNET_PID_TYPES + ) + ).asJava + ) a } pub.setAuthor(authorsOAF.asJava) - if (paper.Date != null && paper.Date.isDefined) { - pub.setDateofacceptance(asField(paper.Date.get.toString.substring(0,10))) + pub.setDateofacceptance(asField(paper.Date.get.toString.substring(0, 10))) } pub.setPublisher(asField(paper.Publisher)) - if (journal != null && journal.DisplayName.isDefined) { val j = new Journal @@ -250,8 +352,9 @@ case object ConversionUtil { pub } - - def createOAF(inputParams: ((MagPapers, MagPaperWithAuthorList), MagPaperAbstract)): Publication = { + def createOAF( + inputParams: ((MagPapers, MagPaperWithAuthorList), MagPaperAbstract) + ): Publication = { val paper = inputParams._1._1 val authors = inputParams._1._2 @@ -268,46 +371,48 @@ case object ConversionUtil { pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) val mainTitles = createSP(paper.PaperTitle, "main title", ModelConstants.DNET_DATACITE_TITLE) - val originalTitles = createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) + val originalTitles = + createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) pub.setTitle(List(mainTitles, originalTitles).asJava) pub.setSource(List(asField(paper.BookTitle)).asJava) - if (description != null) { pub.setDescription(List(asField(description.IndexedAbstract)).asJava) } - val authorsOAF = authors.authors.map { f: MagAuthorAffiliation => - val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author a.setFullname(f.author.DisplayName.get) - if(f.affiliation!= null) + if (f.affiliation != null) a.setAffiliation(List(asField(f.affiliation)).asJava) - - a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", ModelConstants.DNET_PID_TYPES)).asJava) + a.setPid( + List( + createSP( + s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", + "URL", + ModelConstants.DNET_PID_TYPES + ) + ).asJava + ) a } - if (paper.Date != null) { - pub.setDateofacceptance(asField(paper.Date.toString.substring(0,10))) + pub.setDateofacceptance(asField(paper.Date.toString.substring(0, 10))) } pub.setAuthor(authorsOAF.asJava) - pub } - def convertInvertedIndexString(json_input: String): String = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(json_input) @@ -317,13 +422,13 @@ case object ConversionUtil { val iid = (json \ "InvertedIndex").extract[Map[String, List[Int]]] - for {(k: String, v: List[Int]) <- iid} { + for { (k: String, v: List[Int]) <- iid } { v.foreach(item => res(item) = k) } - (0 until idl).foreach(i => { - if (res(i) == null) - res(i) = "" - }) + (0 until idl).foreach(i => { + if (res(i) == null) + res(i) = "" + }) return res.mkString(" ") } "" diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala new file mode 100644 index 000000000..316bd91ac --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -0,0 +1,297 @@ +package eu.dnetlib.doiboost.mag + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkImportMagIntoDataset { + + val datatypedict = Map( + "bool" -> BooleanType, + "int" -> IntegerType, + "uint" -> IntegerType, + "long" -> LongType, + "ulong" -> LongType, + "float" -> FloatType, + "string" -> StringType, + "DateTime" -> DateType + ) + + val stream = Map( + "Affiliations" -> Tuple2( + "mag/Affiliations.txt", + Seq( + "AffiliationId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "GridId:string", + "OfficialPage:string", + "WikiPage:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "Iso3166Code:string", + "Latitude:float?", + "Longitude:float?", + "CreatedDate:DateTime" + ) + ), + "AuthorExtendedAttributes" -> Tuple2( + "mag/AuthorExtendedAttributes.txt", + Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string") + ), + "Authors" -> Tuple2( + "mag/Authors.txt", + Seq( + "AuthorId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "LastKnownAffiliationId:long?", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "ConferenceInstances" -> Tuple2( + "mag/ConferenceInstances.txt", + Seq( + "ConferenceInstanceId:long", + "NormalizedName:string", + "DisplayName:string", + "ConferenceSeriesId:long", + "Location:string", + "OfficialUrl:string", + "StartDate:DateTime?", + "EndDate:DateTime?", + "AbstractRegistrationDate:DateTime?", + "SubmissionDeadlineDate:DateTime?", + "NotificationDueDate:DateTime?", + "FinalVersionDueDate:DateTime?", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "Latitude:float?", + "Longitude:float?", + "CreatedDate:DateTime" + ) + ), + "ConferenceSeries" -> Tuple2( + "mag/ConferenceSeries.txt", + Seq( + "ConferenceSeriesId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "EntityRelatedEntities" -> Tuple2( + "advanced/EntityRelatedEntities.txt", + Seq( + "EntityId:long", + "EntityType:string", + "RelatedEntityId:long", + "RelatedEntityType:string", + "RelatedType:int", + "Score:float" + ) + ), + "FieldOfStudyChildren" -> Tuple2( + "advanced/FieldOfStudyChildren.txt", + Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long") + ), + "FieldOfStudyExtendedAttributes" -> Tuple2( + "advanced/FieldOfStudyExtendedAttributes.txt", + Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string") + ), + "FieldsOfStudy" -> Tuple2( + "advanced/FieldsOfStudy.txt", + Seq( + "FieldOfStudyId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "MainType:string", + "Level:int", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "Journals" -> Tuple2( + "mag/Journals.txt", + Seq( + "JournalId:long", + "Rank:uint", + "NormalizedName:string", + "DisplayName:string", + "Issn:string", + "Publisher:string", + "Webpage:string", + "PaperCount:long", + "PaperFamilyCount:long", + "CitationCount:long", + "CreatedDate:DateTime" + ) + ), + "PaperAbstractsInvertedIndex" -> Tuple2( + "nlp/PaperAbstractsInvertedIndex.txt.*", + Seq("PaperId:long", "IndexedAbstract:string") + ), + "PaperAuthorAffiliations" -> Tuple2( + "mag/PaperAuthorAffiliations.txt", + Seq( + "PaperId:long", + "AuthorId:long", + "AffiliationId:long?", + "AuthorSequenceNumber:uint", + "OriginalAuthor:string", + "OriginalAffiliation:string" + ) + ), + "PaperCitationContexts" -> Tuple2( + "nlp/PaperCitationContexts.txt", + Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string") + ), + "PaperExtendedAttributes" -> Tuple2( + "mag/PaperExtendedAttributes.txt", + Seq("PaperId:long", "AttributeType:int", "AttributeValue:string") + ), + "PaperFieldsOfStudy" -> Tuple2( + "advanced/PaperFieldsOfStudy.txt", + Seq("PaperId:long", "FieldOfStudyId:long", "Score:float") + ), + "PaperMeSH" -> Tuple2( + "advanced/PaperMeSH.txt", + Seq( + "PaperId:long", + "DescriptorUI:string", + "DescriptorName:string", + "QualifierUI:string", + "QualifierName:string", + "IsMajorTopic:bool" + ) + ), + "PaperRecommendations" -> Tuple2( + "advanced/PaperRecommendations.txt", + Seq("PaperId:long", "RecommendedPaperId:long", "Score:float") + ), + "PaperReferences" -> Tuple2( + "mag/PaperReferences.txt", + Seq("PaperId:long", "PaperReferenceId:long") + ), + "PaperResources" -> Tuple2( + "mag/PaperResources.txt", + Seq( + "PaperId:long", + "ResourceType:int", + "ResourceUrl:string", + "SourceUrl:string", + "RelationshipType:int" + ) + ), + "PaperUrls" -> Tuple2( + "mag/PaperUrls.txt", + Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string") + ), + "Papers" -> Tuple2( + "mag/Papers.txt", + Seq( + "PaperId:long", + "Rank:uint", + "Doi:string", + "DocType:string", + "PaperTitle:string", + "OriginalTitle:string", + "BookTitle:string", + "Year:int?", + "Date:DateTime?", + "OnlineDate:DateTime?", + "Publisher:string", + "JournalId:long?", + "ConferenceSeriesId:long?", + "ConferenceInstanceId:long?", + "Volume:string", + "Issue:string", + "FirstPage:string", + "LastPage:string", + "ReferenceCount:long", + "CitationCount:long", + "EstimatedCitation:long", + "OriginalVenue:string", + "FamilyId:long?", + "FamilyRank:uint?", + "DocSubTypes:string", + "CreatedDate:DateTime" + ) + ), + "RelatedFieldOfStudy" -> Tuple2( + "advanced/RelatedFieldOfStudy.txt", + Seq( + "FieldOfStudyId1:long", + "Type1:string", + "FieldOfStudyId2:long", + "Type2:string", + "Rank:float" + ) + ) + ) + + def getSchema(streamName: String): StructType = { + var schema = new StructType() + val d: Seq[String] = stream(streamName)._2 + d.foreach { case t => + val currentType = t.split(":") + val fieldName: String = currentType.head + var fieldType: String = currentType.last + val nullable: Boolean = fieldType.endsWith("?") + if (nullable) + fieldType = fieldType.replace("?", "") + schema = schema.add(StructField(fieldName, datatypedict(fieldType), nullable)) + } + schema + } + + def main(args: Array[String]): Unit = { + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + stream.foreach { case (k, v) => + val s: StructType = getSchema(k) + val df = spark.read + .option("header", "false") + .option("charset", "UTF8") + .option("delimiter", "\t") + .schema(s) + .csv(s"${parser.get("sourcePath")}/${v._1}") + logger.info(s"Converting $k") + + df.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/$k") + } + + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala new file mode 100644 index 000000000..eae669853 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -0,0 +1,241 @@ +package eu.dnetlib.doiboost.mag + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.functions.{col, collect_list, struct} +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} +import scala.collection.JavaConverters._ + +object SparkProcessMAG { + + def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { + d.where(col("Doi").isNotNull) + .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) + .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2)) + .map(_._2)(Encoders.product[MagPapers]) + .map(mp => { + MagPapers( + mp.PaperId, + mp.Rank, + DoiBoostMappingUtil.normalizeDoi(mp.Doi), + mp.DocType, + mp.PaperTitle, + mp.OriginalTitle, + mp.BookTitle, + mp.Year, + mp.Date, + mp.Publisher: String, + mp.JournalId, + mp.ConferenceSeriesId, + mp.ConferenceInstanceId, + mp.Volume, + mp.Issue, + mp.FirstPage, + mp.LastPage, + mp.ReferenceCount, + mp.CitationCount, + mp.EstimatedCitation, + mp.OriginalVenue, + mp.FamilyId, + mp.CreatedDate + ) + })(Encoders.product[MagPapers]) + } + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + val targetPath = parser.get("targetPath") + + import spark.implicits._ + implicit val mapEncoderPubs: Encoder[Publication] = + org.apache.spark.sql.Encoders.kryo[Publication] + implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = + Encoders.tuple(Encoders.STRING, mapEncoderPubs) + + logger.info("Phase 1) make uninue DOI in Papers:") + val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers] + + // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one + val distinctPaper: Dataset[MagPapers] = getDistinctResults(d) + + distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct") + + logger.info("Phase 0) Enrich Publication with description") + val pa = spark.read.load(s"$sourcePath/PaperAbstractsInvertedIndex").as[MagPaperAbstract] + pa.map(ConversionUtil.transformPaperAbstract) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/PaperAbstract") + + logger.info("Phase 3) Group Author by PaperId") + val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor] + + val affiliation = spark.read.load(s"$sourcePath/Affiliations").as[MagAffiliation] + val paperAuthorAffiliation = + spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation] + + paperAuthorAffiliation + .joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId"))) + .map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => + (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null, a.AuthorSequenceNumber)) + } + .joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left") + .map(s => { + val mpa = s._1._2 + val af = s._2 + if (af != null) { + MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName, mpa.sequenceNumber) + } else + mpa + }) + .groupBy("PaperId") + .agg(collect_list(struct($"author", $"affiliation", $"sequenceNumber")).as("authors")) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/merge_step_1_paper_authors") + + logger.info( + "Phase 4) create First Version of publication Entity with Paper Journal and Authors" + ) + + val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal] + + val papers = spark.read.load(s"$workingPath/Papers_distinct").as[MagPapers] + + val paperWithAuthors = + spark.read.load(s"$workingPath/merge_step_1_paper_authors").as[MagPaperWithAuthorList] + + val firstJoin = + papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left") + firstJoin + .joinWith( + paperWithAuthors, + firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), + "left" + ) + .map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) } + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/merge_step_2") + + var magPubs: Dataset[(String, Publication)] = + spark.read + .load(s"$workingPath/merge_step_2") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)) + .as[(String, Publication)] + + val conference = spark.read + .load(s"$sourcePath/ConferenceInstances") + .select( + $"ConferenceInstanceId".as("ci"), + $"DisplayName", + $"Location", + $"StartDate", + $"EndDate" + ) + val conferenceInstance = conference + .joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci"))) + .select( + $"_1.ci", + $"_1.DisplayName", + $"_1.Location", + $"_1.StartDate", + $"_1.EndDate", + $"_2.PaperId" + ) + .as[MagConferenceInstance] + + magPubs + .joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left") + .map(item => ConversionUtil.updatePubsWithConferenceInfo(item)) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/merge_step_3") + + val paperAbstract = spark.read.load(s"$workingPath/PaperAbstract").as[MagPaperAbstract] + + magPubs = spark.read + .load(s"$workingPath/merge_step_3") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)) + .as[(String, Publication)] + + magPubs + .joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") + .map(item => ConversionUtil.updatePubsWithDescription(item)) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/merge_step_4") + + logger.info("Phase 7) Enrich Publication with FieldOfStudy") + + magPubs = spark.read + .load(s"$workingPath/merge_step_4") + .as[Publication] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)) + .as[(String, Publication)] + + val fos = spark.read + .load(s"$sourcePath/FieldsOfStudy") + .select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType") + + val pfos = spark.read.load(s"$sourcePath/PaperFieldsOfStudy") + + val paperField = pfos + .joinWith(fos, fos("fos").equalTo(pfos("FieldOfStudyId"))) + .select($"_1.FieldOfStudyId", $"_2.DisplayName", $"_2.MainType", $"_1.PaperId", $"_1.Score") + .groupBy($"PaperId") + .agg( + collect_list(struct($"FieldOfStudyId", $"DisplayName", $"MainType", $"Score")) + .as("subjects") + ) + .as[MagFieldOfStudy] + + magPubs + .joinWith( + paperField, + col("_1") + .equalTo(paperField("PaperId")), + "left" + ) + .map(item => ConversionUtil.updatePubsWithSubject(item)) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/mag_publication") + + spark.read + .load(s"$workingPath/mag_publication") + .as[Publication] + .filter(p => p.getId != null) + .groupByKey(p => p.getId) + .reduceGroups((a: Publication, b: Publication) => ConversionUtil.mergePublication(a, b)) + .map(_._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/magPublication") + + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala similarity index 66% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 1cd3f7028..7c58afc09 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -4,27 +4,31 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} -import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI} import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo} import org.apache.commons.lang.StringUtils -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ import org.json4s import org.json4s.DefaultFormats import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods._ +import org.slf4j.{Logger, LoggerFactory} +import scala.collection.JavaConverters._ -case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} -case class OrcidAuthor(oid:String, name:Option[String], surname:Option[String], creditName:Option[String], otherNames:Option[List[String]], errorCode:Option[String]){} -case class OrcidWork(oid:String, doi:String) +case class ORCIDItem(doi: String, authors: List[OrcidAuthor]) {} +case class OrcidAuthor( + oid: String, + name: Option[String], + surname: Option[String], + creditName: Option[String], + otherNames: Option[List[String]], + errorCode: Option[String] +) {} +case class OrcidWork(oid: String, doi: String) +case class ORCIDElement(doi: String, authors: List[ORCIDItem]) {} - -case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {} object ORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) val mapper = new ObjectMapper() @@ -42,7 +46,7 @@ object ORCIDToOAF { def extractValueFromInputString(input: String): (String, String) = { val i = input.indexOf('[') - if (i <5) { + if (i < 5) { return null } val orcidList = input.substring(i, input.length - 1) @@ -52,17 +56,16 @@ object ORCIDToOAF { } else null } - - def strValid(s:Option[String]) : Boolean = { + def strValid(s: Option[String]): Boolean = { s.isDefined && s.get.nonEmpty } - def authorValid(author:OrcidAuthor): Boolean ={ + def authorValid(author: OrcidAuthor): Boolean = { if (strValid(author.name) && strValid(author.surname)) { return true } if (strValid(author.surname)) { - return true + return true } if (strValid(author.creditName)) { return true @@ -71,37 +74,35 @@ object ORCIDToOAF { false } - - def extractDOIWorks(input:String): List[OrcidWork] = { + def extractDOIWorks(input: String): List[OrcidWork] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val oid = (json \ "workDetail" \"oid").extractOrElse[String](null) + val oid = (json \ "workDetail" \ "oid").extractOrElse[String](null) if (oid == null) return List() - val doi:List[(String, String)] = for { - JObject(extIds) <- json \ "workDetail" \"extIds" + val doi: List[(String, String)] = for { + JObject(extIds) <- json \ "workDetail" \ "extIds" JField("type", JString(typeValue)) <- extIds - JField("value", JString(value)) <- extIds + JField("value", JString(value)) <- extIds if "doi".equalsIgnoreCase(typeValue) } yield (typeValue, DoiBoostMappingUtil.normalizeDoi(value)) if (doi.nonEmpty) { - return doi.map(l =>OrcidWork(oid, l._2)) + return doi.map(l => OrcidWork(oid, l._2)) } List() } - def convertORCIDAuthor(input:String): OrcidAuthor = { + def convertORCIDAuthor(input: String): OrcidAuthor = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - (json \"authorData" ).extractOrElse[OrcidAuthor](null) - } + (json \ "authorData").extractOrElse[OrcidAuthor](null) + } - - def convertTOOAF(input:ORCIDItem) :Publication = { + def convertTOOAF(input: ORCIDItem): Publication = { val doi = input.doi - val pub:Publication = new Publication + val pub: Publication = new Publication pub.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) pub.setDataInfo(generateDataInfo()) @@ -109,9 +110,9 @@ object ORCIDToOAF { if (pub.getId == null) return null - try{ + try { - val l:List[Author]= input.authors.map(a=> { + val l: List[Author] = input.authors.map(a => { generateAuthor(a) })(collection.breakOut) @@ -126,30 +127,38 @@ object ORCIDToOAF { } } - def generateOricPIDDatainfo():DataInfo = { - val di =DoiBoostMappingUtil.generateDataInfo("0.91") + def generateOricPIDDatainfo(): DataInfo = { + val di = DoiBoostMappingUtil.generateDataInfo("0.91") di.getProvenanceaction.setClassid(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY) di.getProvenanceaction.setClassname(ModelConstants.HARVESTED) di } - def generateAuthor(o : OrcidAuthor): Author = { + def generateAuthor(o: OrcidAuthor): Author = { val a = new Author if (strValid(o.name)) { - a.setName(o.name.get.capitalize) + a.setName(o.name.get.capitalize) } if (strValid(o.surname)) { a.setSurname(o.surname.get.capitalize) } - if(strValid(o.name) && strValid(o.surname)) + if (strValid(o.name) && strValid(o.surname)) a.setFullname(s"${o.name.get.capitalize} ${o.surname.get.capitalize}") else if (strValid(o.creditName)) a.setFullname(o.creditName.get) if (StringUtils.isNotBlank(o.oid)) - a.setPid(List(createSP(o.oid, ModelConstants.ORCID, ModelConstants.DNET_PID_TYPES, generateOricPIDDatainfo())).asJava) + a.setPid( + List( + createSP( + o.oid, + ModelConstants.ORCID, + ModelConstants.DNET_PID_TYPES, + generateOricPIDDatainfo() + ) + ).asJava + ) a } - -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala similarity index 65% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index fa4a93e00..95a1f5a19 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -10,11 +10,11 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - - def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { + def run(spark: SparkSession, workingPath: String, targetPath: String): Unit = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + val dataset: Dataset[ORCIDItem] = + spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) @@ -22,21 +22,27 @@ object SparkConvertORCIDToOAF { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkConvertORCIDToOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark,workingPath, targetPath) + run(spark, workingPath, targetPath) } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala new file mode 100644 index 000000000..7b6408417 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -0,0 +1,94 @@ +package eu.dnetlib.doiboost.orcid + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.Publication +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions.{col, collect_list} +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +object SparkPreprocessORCID { + val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + + def fixORCIDItem(item: ORCIDItem): ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + + } + + def run(spark: SparkSession, sourcePath: String, workingPath: String): Unit = { + import spark.implicits._ + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + + val inputRDD: RDD[OrcidAuthor] = spark.sparkContext + .textFile(s"$sourcePath/authors") + .map(s => ORCIDToOAF.convertORCIDAuthor(s)) + .filter(s => s != null) + .filter(s => ORCIDToOAF.authorValid(s)) + + spark + .createDataset(inputRDD) + .as[OrcidAuthor] + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/author") + + val res = spark.sparkContext + .textFile(s"$sourcePath/works") + .flatMap(s => ORCIDToOAF.extractDOIWorks(s)) + .filter(s => s != null) + + spark + .createDataset(res) + .as[OrcidWork] + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/works") + + val authors: Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + + val works: Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + + works + .joinWith(authors, authors("oid").equalTo(works("oid"))) + .map(i => { + val doi = i._1.doi + val author = i._2 + (doi, author) + }) + .groupBy(col("_1").alias("doi")) + .agg(collect_list(col("_2")).alias("authors")) + .as[ORCIDItem] + .map(s => fixORCIDItem(s)) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/orcidworksWithAuthor") + } + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkConvertORCIDToOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json" + ) + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + + run(spark, sourcePath, workingPath) + + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala similarity index 64% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 4530926f1..9f7f9d18f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -1,42 +1,47 @@ package eu.dnetlib.doiboost.uw import eu.dnetlib.dhp.application.ArgumentApplicationParser - import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} - object SparkMapUnpayWallToOAF { def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + SparkMapDumpIntoOAF.getClass.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath") + val inputRDD: RDD[String] = spark.sparkContext.textFile(s"$sourcePath") logger.info("Converting UnpayWall to OAF") - val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication] + val d: Dataset[Publication] = spark + .createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p => p != null)) + .as[Publication] d.write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala similarity index 70% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala rename to dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index c8324cde1..bbdc80b1d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -4,42 +4,49 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication} import eu.dnetlib.doiboost.DoiBoostMappingUtil +import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ -import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color - - - -case class OALocation(evidence:Option[String], host_type:Option[String], is_best:Option[Boolean], license: Option[String], pmh_id:Option[String], updated:Option[String], - url:Option[String], url_for_landing_page:Option[String], url_for_pdf:Option[String], version:Option[String]) {} - - +case class OALocation( + evidence: Option[String], + host_type: Option[String], + is_best: Option[Boolean], + license: Option[String], + pmh_id: Option[String], + updated: Option[String], + url: Option[String], + url_for_landing_page: Option[String], + url_for_pdf: Option[String], + version: Option[String] +) {} object UnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(getClass) - - def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { - if(input == null || input.equalsIgnoreCase("close")) + def get_unpaywall_color(input: String): Option[OpenAccessRoute] = { + if (input == null || input.equalsIgnoreCase("close")) return None - if(input.equalsIgnoreCase("green")) + if (input.equalsIgnoreCase("green")) return Some(OpenAccessRoute.green) - if(input.equalsIgnoreCase("bronze")) + if (input.equalsIgnoreCase("bronze")) return Some(OpenAccessRoute.bronze) - if(input.equalsIgnoreCase("hybrid")) + if (input.equalsIgnoreCase("hybrid")) return Some(OpenAccessRoute.hybrid) else return Some(OpenAccessRoute.gold) } - def get_color(is_oa:Boolean, location: OALocation, journal_is_oa:Boolean):Option[OpenAccessRoute] = { + def get_color( + is_oa: Boolean, + location: OALocation, + journal_is_oa: Boolean + ): Option[OpenAccessRoute] = { if (is_oa) { if (location.host_type.isDefined) { { @@ -63,23 +70,22 @@ object UnpayWallToOAF { None } - - def convertToOAF(input:String):Publication = { + def convertToOAF(input: String): Publication = { val pub = new Publication implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val doi = DoiBoostMappingUtil.normalizeDoi((json \"doi").extract[String]) + val doi = DoiBoostMappingUtil.normalizeDoi((json \ "doi").extract[String]) - if(doi == null) + if (doi == null) return null - val is_oa = (json\ "is_oa").extract[Boolean] + val is_oa = (json \ "is_oa").extract[Boolean] - val journal_is_oa= (json\ "journal_is_oa").extract[Boolean] + val journal_is_oa = (json \ "journal_is_oa").extract[Boolean] - val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) + val oaLocation: OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) val colour = get_unpaywall_color((json \ "oa_status").extractOrElse[String](null)) @@ -89,9 +95,9 @@ object UnpayWallToOAF { if (!is_oa) return null - if(oaLocation== null || oaLocation.url.isEmpty) - return null - val i :Instance= new Instance() + if (oaLocation == null || oaLocation.url.isEmpty) + return null + val i: Instance = new Instance() i.setCollectedfrom(createUnpayWallCollectedFrom()) // i.setAccessright(getOpenAccessQualifier()) @@ -123,7 +129,4 @@ object UnpayWallToOAF { } - - - } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala deleted file mode 100644 index 4912648be..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala +++ /dev/null @@ -1,70 +0,0 @@ -package eu.dnetlib.dhp.doiboost - -import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset} -import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType} -import eu.dnetlib.doiboost.SparkGenerateDoiBoost.getClass -import eu.dnetlib.doiboost.mag.ConversionUtil -import eu.dnetlib.doiboost.orcid.ORCIDElement -import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} -import org.junit.jupiter.api.Test - -import scala.io.Source - -class DoiBoostHostedByMapTest { - - - -// @Test -// def testMerge():Unit = { -// val conf: SparkConf = new SparkConf() -// val spark: SparkSession = -// SparkSession -// .builder() -// .config(conf) -// .appName(getClass.getSimpleName) -// .master("local[*]").getOrCreate() -// -// -// -// implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] -// implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] -// implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub) -// -// -// import spark.implicits._ -// val dataset:RDD[String]= spark.sparkContext.textFile("/home/sandro/Downloads/hbMap.gz") -// -// -// val hbMap:Dataset[(String, HostedByItemType)] =spark.createDataset(dataset.map(DoiBoostMappingUtil.toHostedByItem)) -// -// -// hbMap.show() -// -// -// -// -// -// -// -// -// -// -// } - - - @Test - def idDSGeneration():Unit = { - val s ="doajarticles::0066-782X" - - - - println(DoiBoostMappingUtil.generateDSId(s)) - - - } - - -} diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala new file mode 100644 index 000000000..61d2eef29 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/DoiBoostHostedByMapTest.scala @@ -0,0 +1,16 @@ +package eu.dnetlib.dhp.doiboost + +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import org.junit.jupiter.api.Test + +class DoiBoostHostedByMapTest { + + @Test + def idDSGeneration(): Unit = { + val s = "doajarticles::0066-782X" + + println(DoiBoostMappingUtil.generateDSId(s)) + + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala similarity index 80% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala index a9a841ee9..391d45b10 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala @@ -6,41 +6,39 @@ import org.junit.jupiter.api.Test class NormalizeDOITest { @Test - def doiDSLowerCase():Unit = { - val doi ="10.1042/BCJ20160876" + def doiDSLowerCase(): Unit = { + val doi = "10.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi).equals(doi.toLowerCase())) } - @Test - def doiFiltered():Unit = { + def doiFiltered(): Unit = { val doi = "0.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi) == null) } @Test - def doiFiltered2():Unit = { + def doiFiltered2(): Unit = { val doi = "https://doi.org/0.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi) == null) } - @Test - def doiCleaned():Unit = { + def doiCleaned(): Unit = { val doi = "https://doi.org/10.1042/BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase())) } @Test - def doiCleaned1():Unit = { + def doiCleaned1(): Unit = { val doi = "https://doi.org/10.1042/ BCJ20160876" assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase())) } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala similarity index 54% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 5ef92cfa4..8124a5aae 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -1,7 +1,8 @@ -package eu.dnetlib.doiboost.crossref +package eu.dnetlib.dhp.doiboost.crossref import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.utils.DHPUtils +import eu.dnetlib.doiboost.crossref.Crossref2Oaf import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test @@ -11,20 +12,24 @@ import scala.collection.JavaConverters._ import scala.io.Source import scala.util.matching.Regex - class CrossrefMappingTest { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val mapper = new ObjectMapper() - - @Test def testFunderRelationshipsMapping(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString - val funder_doi = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString - val funder_name = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString - + val template = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json") + ) + .mkString + val funder_doi = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")) + .mkString + val funder_name = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")) + .mkString for (line <- funder_doi.lines) { val json = template.replace("%s", line) @@ -42,7 +47,8 @@ class CrossrefMappingTest { def checkRelation(generatedOAF: List[Oaf]): Unit = { - val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] + val rels: List[Relation] = + generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] assertFalse(rels.isEmpty) rels.foreach(relation => { val relJson = mapper.writeValueAsString(relation) @@ -58,22 +64,22 @@ class CrossrefMappingTest { } - @Test - def testSum() :Unit = { - val from:Long = 1613135645000L - val delta:Long = 1000000L - - - println(s"updating from value: $from -> ${from+delta}") + def testSum(): Unit = { + val from: Long = 1613135645000L + val delta: Long = 1000000L + println(s"updating from value: $from -> ${from + delta}") } @Test - def testOrcidID() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("orcid_data.json")).mkString - + def testOrcidID(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/orcid_data.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -84,17 +90,18 @@ class CrossrefMappingTest { val items = resultList.filter(p => p.isInstanceOf[Result]) - mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) items.foreach(p => println(mapper.writeValueAsString(p))) - } @Test - def testEmptyTitle() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("empty_title.json")).mkString - + def testEmptyTitle(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/empty_title.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -105,17 +112,16 @@ class CrossrefMappingTest { val items = resultList.filter(p => p.isInstanceOf[Result]) - mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) items.foreach(p => println(mapper.writeValueAsString(p))) - } - @Test def testPeerReviewed(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("prwTest.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json")) + .mkString mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) assertNotNull(json) @@ -127,12 +133,8 @@ class CrossrefMappingTest { val items = resultList.filter(p => p.isInstanceOf[Result]) - items.foreach(p => logger.info(mapper.writeValueAsString(p))) - - - } def extractECAward(award: String): String = { @@ -142,21 +144,21 @@ class CrossrefMappingTest { null } - @Test def extractECTest(): Unit = { - val s = "FP7/2007-2013" + val s = "FP7/2007-2013" val awardExtracted = extractECAward(s) println(awardExtracted) println(DHPUtils.md5(awardExtracted)) - } @Test def testJournalRelation(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("awardTest.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/awardTest.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty) @@ -164,20 +166,19 @@ class CrossrefMappingTest { val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) - val rels:List[Relation] = resultList.filter(p => p.isInstanceOf[Relation]).map(r=> r.asInstanceOf[Relation]) - - + val rels: List[Relation] = + resultList.filter(p => p.isInstanceOf[Relation]).map(r => r.asInstanceOf[Relation]) rels.foreach(s => logger.info(s.getTarget)) - assertEquals(rels.size, 6 ) - + assertEquals(rels.size, 6) } - @Test def testConvertBookFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("book.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/book.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -198,42 +199,62 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); val collectedFromList = result.getCollectedfrom.asScala - assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") - - assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") + assert( + collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), + "Wrong collected from assertion" + ) + assert( + collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), + "Wrong collected from assertion" + ) val relevantDates = result.getRelevantdate.asScala - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print") + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), + "Missing relevant date of type created" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), + "Missing relevant date of type published-online" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), + "Missing relevant date of type published-print" + ) val rels = resultList.filter(p => p.isInstanceOf[Relation]) assert(rels.isEmpty) } - @Test def testConvertPreprintFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("preprint.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/preprint.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -254,44 +275,70 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); val collectedFromList = result.getCollectedfrom.asScala - assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") - - assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") + assert( + collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), + "Wrong collected from assertion" + ) + assert( + collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), + "Wrong collected from assertion" + ) val relevantDates = result.getRelevantdate.asScala - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("available")), "Missing relevant date of type available") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("accepted")), "Missing relevant date of type accepted") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online") - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print") + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), + "Missing relevant date of type created" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("available")), + "Missing relevant date of type available" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("accepted")), + "Missing relevant date of type accepted" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), + "Missing relevant date of type published-online" + ) + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), + "Missing relevant date of type published-print" + ) val rels = resultList.filter(p => p.isInstanceOf[Relation]) assert(rels.isEmpty) } - @Test def testConvertDatasetFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("dataset.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dataset.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -312,19 +359,24 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); @@ -332,7 +384,9 @@ class CrossrefMappingTest { @Test def testConvertArticleFromCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -353,32 +407,45 @@ class CrossrefMappingTest { assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull( result.getDataInfo.getProvenanceaction, - "DataInfo/Provenance test not null Failed"); + "DataInfo/Provenance test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassid.isEmpty, - "DataInfo/Provenance/classId test not null Failed"); + "DataInfo/Provenance/classId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getClassname.isEmpty, - "DataInfo/Provenance/className test not null Failed"); + "DataInfo/Provenance/className test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, - "DataInfo/Provenance/SchemeId test not null Failed"); + "DataInfo/Provenance/SchemeId test not null Failed" + ); assertFalse( result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, - "DataInfo/Provenance/SchemeName test not null Failed"); + "DataInfo/Provenance/SchemeName test not null Failed" + ); assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); assertFalse(result.getCollectedfrom.isEmpty); val collectedFromList = result.getCollectedfrom.asScala - assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") - - assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") + assert( + collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), + "Wrong collected from assertion" + ) + assert( + collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), + "Wrong collected from assertion" + ) val relevantDates = result.getRelevantdate.asScala - assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") + assert( + relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), + "Missing relevant date of type created" + ) val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] assertFalse(rels.isEmpty) @@ -392,15 +459,14 @@ class CrossrefMappingTest { }) - } - - @Test def testSetDateOfAcceptanceCrossRef2Oaf(): Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("dump_file.json")).mkString + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/dump_file.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -415,56 +481,18 @@ class CrossrefMappingTest { assert(items.size == 1) val result: Result = items.head.asInstanceOf[Publication] assertNotNull(result) - logger.info(mapper.writeValueAsString(result)); - -// assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); -// assertNotNull( -// result.getDataInfo.getProvenanceaction, -// "DataInfo/Provenance test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getClassid.isEmpty, -// "DataInfo/Provenance/classId test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getClassname.isEmpty, -// "DataInfo/Provenance/className test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty, -// "DataInfo/Provenance/SchemeId test not null Failed"); -// assertFalse( -// result.getDataInfo.getProvenanceaction.getSchemename.isEmpty, -// "DataInfo/Provenance/SchemeName test not null Failed"); -// -// assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed"); -// assertFalse(result.getCollectedfrom.isEmpty); -// -// val collectedFromList = result.getCollectedfrom.asScala -// assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion") -// -// assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion") -// -// -// val relevantDates = result.getRelevantdate.asScala -// -// assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created") -// -// val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] -// assertFalse(rels.isEmpty) -// rels.foreach(relation => { -// assertNotNull(relation) -// assertFalse(relation.getSource.isEmpty) -// assertFalse(relation.getTarget.isEmpty) -// assertFalse(relation.getRelClass.isEmpty) -// assertFalse(relation.getRelType.isEmpty) -// assertFalse(relation.getSubRelType.isEmpty) -// -// }) } @Test def testNormalizeDOI(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString - val line :String = "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}]," + val template = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_funder_template.json") + ) + .mkString + val line: String = + "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}]," val json = template.replace("%s", line) val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) @@ -473,13 +501,17 @@ class CrossrefMappingTest { result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi"))) assertTrue(result.getPid.size() == 1) - result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase()))) + result.getPid.asScala.foreach(pid => + assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())) + ) } @Test def testNormalizeDOI2(): Unit = { - val template = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString + val template = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")) + .mkString val resultList: List[Oaf] = Crossref2Oaf.convert(template) assertTrue(resultList.nonEmpty) @@ -488,14 +520,19 @@ class CrossrefMappingTest { result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi"))) assertTrue(result.getPid.size() == 1) - result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase()))) + result.getPid.asScala.foreach(pid => + assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())) + ) } @Test - def testLicenseVorClosed() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_vor.json")).mkString - + def testLicenseVorClosed(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_vor.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -504,25 +541,28 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) - - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))) + assertTrue( + item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor")) + ) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED")) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) - - - } @Test - def testLicenseOpen() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_open.json")).mkString - + def testLicenseOpen(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_open.json") + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -531,21 +571,33 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html"))) + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html" + ) + ) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid) + ) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) } @Test - def testLicenseEmbargoOpen() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_open.json")).mkString - + def testLicenseEmbargoOpen(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json" + ) + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -554,21 +606,33 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid) + ) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) } @Test - def testLicenseEmbargo() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo.json")).mkString - + def testLicenseEmbargo(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/doiboost/crossref/publication_license_embargo.json" + ) + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -577,35 +641,18 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) - mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) - println(mapper.writeValueAsString(item)) - - } - - - @Test - def testLicenseEmbargoDateTime() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_datetime.json")).mkString - - - assertNotNull(json) - assertFalse(json.isEmpty); - - val resultList: List[Oaf] = Crossref2Oaf.convert(json) - - assertTrue(resultList.nonEmpty) - - - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - - assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) - assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) + ) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")) + ) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writeValueAsString(item)) @@ -613,9 +660,14 @@ class CrossrefMappingTest { } @Test - def testMultipleURLs() :Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("multiple_urls.json")).mkString - + def testLicenseEmbargoDateTime(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream( + "/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json" + ) + ) + .mkString assertNotNull(json) assertFalse(json.isEmpty); @@ -624,12 +676,47 @@ class CrossrefMappingTest { assertTrue(resultList.nonEmpty) + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] - val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + assertTrue( + item.getInstance().asScala exists (i => + i.getLicense.getValue.equals( + "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model" + ) + ) + ) + assertTrue( + item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")) + ) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testMultipleURLs(): Unit = { + val json = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/multiple_urls.json") + ) + .mkString + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] assertEquals(1, item.getInstance().size()) assertEquals(1, item.getInstance().get(0).getUrl().size()) - assertEquals("https://doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) + assertEquals( + "https://doi.org/10.1016/j.jas.2019.105013", + item.getInstance().get(0).getUrl().get(0) + ) //println(mapper.writeValueAsString(item)) } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala similarity index 65% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala index 46d4ec08d..882c0d8a0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/mag/MAGMappingTest.scala @@ -1,53 +1,46 @@ -package eu.dnetlib.doiboost.mag +package eu.dnetlib.dhp.doiboost.mag +import eu.dnetlib.doiboost.mag.{ConversionUtil, MagPapers, SparkProcessMAG} import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, SparkSession} import org.codehaus.jackson.map.ObjectMapper +import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test -import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} import java.sql.Timestamp import scala.io.Source - - class MAGMappingTest { val logger: Logger = LoggerFactory.getLogger(getClass) val mapper = new ObjectMapper() - - - @Test - def testSplitter():Unit = { + def testSplitter(): Unit = { val s = "sports.team" - if (s.contains(".")) { - println(s.split("\\.")head) + println(s.split("\\.") head) } } - - @Test - def testDate() :Unit = { + def testDate(): Unit = { - val p:Timestamp = Timestamp.valueOf("2011-10-02 00:00:00") + val p: Timestamp = Timestamp.valueOf("2011-10-02 00:00:00") - println(p.toString.substring(0,10)) + println(p.toString.substring(0, 10)) } - - @Test def buildInvertedIndexTest(): Unit = { - val json_input = Source.fromInputStream(getClass.getResourceAsStream("invertedIndex.json")).mkString + val json_input = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/mag/invertedIndex.json")) + .mkString val description = ConversionUtil.convertInvertedIndexString(json_input) assertNotNull(description) assertTrue(description.nonEmpty) @@ -55,10 +48,9 @@ class MAGMappingTest { logger.debug(description) } + @Test - def normalizeDoiTest():Unit = { - - + def normalizeDoiTest(): Unit = { implicit val formats = DefaultFormats @@ -71,14 +63,15 @@ class MAGMappingTest { .appName(getClass.getSimpleName) .config(conf) .getOrCreate() - val path = getClass.getResource("magPapers.json").getPath + val path = getClass.getResource("/eu/dnetlib/doiboost/mag/magPapers.json").getPath import org.apache.spark.sql.Encoders val schema = Encoders.product[MagPapers].schema import spark.implicits._ - val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] - val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + val magPapers: Dataset[MagPapers] = + spark.read.option("multiline", true).schema(schema).json(path).as[MagPapers] + val ret: Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) assertTrue(ret.count == 10) ret.take(10).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) @@ -86,7 +79,7 @@ class MAGMappingTest { } @Test - def normalizeDoiTest2():Unit = { + def normalizeDoiTest2(): Unit = { import org.json4s.DefaultFormats @@ -101,21 +94,19 @@ class MAGMappingTest { .appName(getClass.getSimpleName) .config(conf) .getOrCreate() - val path = getClass.getResource("duplicatedMagPapers.json").getPath + val path = getClass.getResource("/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json").getPath import org.apache.spark.sql.Encoders val schema = Encoders.product[MagPapers].schema import spark.implicits._ - val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] - val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + val magPapers: Dataset[MagPapers] = + spark.read.option("multiline", true).schema(schema).json(path).as[MagPapers] + val ret: Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) assertTrue(ret.count == 8) ret.take(8).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) spark.close() //ret.take(8).foreach(mp => println(write(mp))) } - } - - diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala similarity index 65% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala index b484dc087..e5bf1bd5f 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -1,7 +1,8 @@ -package eu.dnetlib.doiboost.orcid +package eu.dnetlib.dhp.doiboost.orcid import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.doiboost.orcid._ import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.junit.jupiter.api.Assertions._ @@ -10,17 +11,18 @@ import org.junit.jupiter.api.io.TempDir import org.slf4j.{Logger, LoggerFactory} import java.nio.file.Path -import scala.io.Source - import scala.collection.JavaConversions._ +import scala.io.Source class MappingORCIDToOAFTest { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) val mapper = new ObjectMapper() @Test - def testExtractData():Unit ={ - val json = Source.fromInputStream(getClass.getResourceAsStream("dataOutput")).mkString + def testExtractData(): Unit = { + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/orcid/dataOutput")) + .mkString assertNotNull(json) assertFalse(json.isEmpty) json.lines.foreach(s => { @@ -29,10 +31,10 @@ class MappingORCIDToOAFTest { } @Test - def testOAFConvert(@TempDir testDir: Path):Unit ={ - val sourcePath:String = getClass.getResource("/eu/dnetlib/doiboost/orcid/datasets").getPath - val targetPath: String =s"${testDir.toString}/output/orcidPublication" - val workingPath =s"${testDir.toString}/wp/" + def testOAFConvert(@TempDir testDir: Path): Unit = { + val sourcePath: String = getClass.getResource("/eu/dnetlib/doiboost/orcid/datasets").getPath + val targetPath: String = s"${testDir.toString}/output/orcidPublication" + val workingPath = s"${testDir.toString}/wp/" val conf = new SparkConf() conf.setMaster("local[*]") @@ -46,18 +48,14 @@ class MappingORCIDToOAFTest { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - SparkPreprocessORCID.run( spark,sourcePath, workingPath) + SparkPreprocessORCID.run(spark, sourcePath, workingPath) - SparkConvertORCIDToOAF.run(spark, workingPath,targetPath) + SparkConvertORCIDToOAF.run(spark, workingPath, targetPath) val mapper = new ObjectMapper() - - val oA = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem].count() - - val p: Dataset[Publication] = spark.read.load(targetPath).as[Publication] assertTrue(oA == p.count()) @@ -65,19 +63,18 @@ class MappingORCIDToOAFTest { spark.close() - } - @Test - def testExtractDat1():Unit ={ + def testExtractDat1(): Unit = { + val aList: List[OrcidAuthor] = List( + OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null), + OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null), + OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null) + ) - - val aList: List[OrcidAuthor] = List(OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null ), - OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null ), OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null )) - - val orcid:ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList) + val orcid: ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList) val oaf = ORCIDToOAF.convertTOOAF(orcid) assert(oaf.getPid.size() == 1) @@ -85,10 +82,6 @@ class MappingORCIDToOAFTest { oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876"))) //println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid))) - } - - - } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala similarity index 60% rename from dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala rename to dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala index fa696fffc..542faa8ad 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala @@ -1,54 +1,56 @@ -package eu.dnetlib.doiboost.uw - +package eu.dnetlib.dhp.doiboost.uw import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.OpenAccessRoute +import eu.dnetlib.doiboost.uw.UnpayWallToOAF +import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test +import org.slf4j.{Logger, LoggerFactory} import scala.io.Source -import org.junit.jupiter.api.Assertions._ -import org.slf4j.{Logger, LoggerFactory} class UnpayWallMappingTest { val logger: Logger = LoggerFactory.getLogger(getClass) val mapper = new ObjectMapper() - @Test - def testMappingToOAF():Unit ={ + def testMappingToOAF(): Unit = { - val Ilist = Source.fromInputStream(getClass.getResourceAsStream("input.json")).mkString + val Ilist = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/uw/input.json")) + .mkString - var i:Int = 0 - for (line <-Ilist.lines) { + var i: Int = 0 + for (line <- Ilist.lines) { val p = UnpayWallToOAF.convertToOAF(line) - if(p!= null) { - assertTrue(p.getInstance().size()==1) - if (i== 0){ + if (p != null) { + assertTrue(p.getInstance().size() == 1) + if (i == 0) { assertTrue(p.getPid.get(0).getValue.equals("10.1038/2211089b0")) } - if (i== 1){ + if (i == 1) { assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00058.s001")) } - if (i== 2){ + if (i == 2) { assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00086.s001")) } logger.info(s"ID : ${p.getId}") } assertNotNull(line) assertTrue(line.nonEmpty) - i = i+1 + i = i + 1 } - - - val l = Ilist.lines.next() + val l = Ilist.lines.next() val item = UnpayWallToOAF.convertToOAF(l) - assertEquals(item.getInstance().get(0).getAccessright.getOpenAccessRoute, OpenAccessRoute.bronze) + assertEquals( + item.getInstance().get(0).getAccessright.getOpenAccessRoute, + OpenAccessRoute.bronze + ) logger.info(mapper.writeValueAsString(item)) diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 644ac2140..0b4269acd 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -48,6 +48,12 @@ io.github.classgraph classgraph + + eu.dnetlib.dhp + dhp-aggregation + 1.2.4-SNAPSHOT + compile +
diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/KeyValueSet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/KeyValueSet.java new file mode 100644 index 000000000..57ab716b3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/KeyValueSet.java @@ -0,0 +1,26 @@ + +package eu.dnetlib.dhp; + +import java.io.Serializable; +import java.util.ArrayList; + +public class KeyValueSet implements Serializable { + private String key; + private ArrayList valueSet; + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public ArrayList getValueSet() { + return valueSet; + } + + public void setValueSet(ArrayList valueSet) { + this.valueSet = valueSet; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 392a5ab44..02fdcb09b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp; +import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -55,6 +56,9 @@ public class PropagationConstant { public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID = "result:organization:instrepo"; public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME = "Propagation of affiliation to result collected from datasources of type institutional repository"; + public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID = "result:organization:semrel"; + public static final String PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME = "Propagation of affiliation to result through sematic relations"; + public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID = "result:project:semrel"; public static final String PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME = "Propagation of result to project through semantic relation"; @@ -67,6 +71,13 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; + public static final String ITERATION_ONE = "ExitAtFirstIteration"; + public static final String ITERATION_TWO = "ExitAtSecondIteration"; + public static final String ITERATION_THREE = "ExitAtThirdIteration"; + public static final String ITERATION_FOUR = "ExitAtFourthIteration"; + public static final String ITERATION_FIVE = "ExitAtFifthIteration"; + public static final String ITERATION_NO_PARENT = "ExitAtNoFirstParentReached"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb " @@ -127,6 +138,39 @@ public class PropagationConstant { return pa; } + public static ArrayList getOrganizationRelationPair(String orgId, + String resultId, + String classID, + String className + + ) { + ArrayList newRelations = new ArrayList(); + newRelations + .add( + getRelation( + orgId, + resultId, + ModelConstants.IS_AUTHOR_INSTITUTION_OF, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + classID, + className)); + newRelations + .add( + getRelation( + resultId, + orgId, + ModelConstants.HAS_AUTHOR_INSTITUTION, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + classID, + className)); + + return newRelations; + } + public static Relation getRelation( String source, String target, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c8b1bc8fe..71a2b3525 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -95,13 +95,14 @@ public class ResultTagger implements Serializable { } - result - .getInstance() - .stream() - .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey())) - .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) - .map(s -> StringUtils.substringAfter(s, "|")) - .collect(Collectors.toCollection(HashSet::new)) +// result +// .getInstance() +// .stream() +// .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey())) +// .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) +// .map(s -> StringUtils.substringAfter(s, "|")) +// .collect(Collectors.toCollection(HashSet::new)) + tmp .forEach( dsId -> datasources .addAll( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 0ef5ca181..50ab997b6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.KeyValueSet; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Datasource; @@ -124,7 +125,7 @@ public class PrepareResultInstRepoAssociation { private static void prepareAlreadyLinkedAssociation( SparkSession spark, String alreadyLinkedPath) { - String query = "Select source resultId, collect_set(target) organizationSet " + String query = "Select source key, collect_set(target) valueSet " + "from relation " + "where datainfo.deletedbyinference = false " + "and lower(relClass) = '" @@ -134,7 +135,7 @@ public class PrepareResultInstRepoAssociation { spark .sql(query) - .as(Encoders.bean(ResultOrganizationSet.class)) + .as(Encoders.bean(KeyValueSet.class)) // TODO retry to stick with datasets .toJavaRDD() .map(r -> OBJECT_MAPPER.writeValueAsString(r)) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java deleted file mode 100644 index 3bce14cdb..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java +++ /dev/null @@ -1,26 +0,0 @@ - -package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; - -import java.io.Serializable; -import java.util.ArrayList; - -public class ResultOrganizationSet implements Serializable { - private String resultId; - private ArrayList organizationSet; - - public String getResultId() { - return resultId; - } - - public void setResultId(String resultId) { - this.resultId = resultId; - } - - public ArrayList getOrganizationSet() { - return organizationSet; - } - - public void setOrganizationSet(ArrayList organizationSet) { - this.organizationSet = organizationSet; - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 63824f1a8..0757ebccd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -18,6 +18,7 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import eu.dnetlib.dhp.KeyValueSet; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -28,7 +29,7 @@ public class SparkResultToOrganizationFromIstRepoJob { private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromIstRepoJob.class); - private static final String RESULT_ORGANIZATIONSET_QUERY = "SELECT id resultId, collect_set(organizationId) organizationSet " + private static final String RESULT_ORGANIZATIONSET_QUERY = "SELECT id key, collect_set(organizationId) valueSet " + "FROM ( SELECT id, organizationId " + "FROM rels " + "JOIN cfhb " @@ -107,14 +108,14 @@ public class SparkResultToOrganizationFromIstRepoJob { Dataset dsOrg = readPath(spark, datasourceorganization, DatasourceOrganization.class); - Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); + Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); - Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class); + Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, KeyValueSet.class); potentialUpdates .joinWith( alreadyLinked, - potentialUpdates.col("resultId").equalTo(alreadyLinked.col("resultId")), + potentialUpdates.col("key").equalTo(alreadyLinked.col("key")), "left_outer") .flatMap(createRelationFn(), Encoders.bean(Relation.class)) .write() @@ -123,49 +124,34 @@ public class SparkResultToOrganizationFromIstRepoJob { .json(outputPath); } - private static FlatMapFunction, Relation> createRelationFn() { + private static FlatMapFunction, Relation> createRelationFn() { return value -> { List newRelations = new ArrayList<>(); - ResultOrganizationSet potentialUpdate = value._1(); - Optional alreadyLinked = Optional.ofNullable(value._2()); - List organizations = potentialUpdate.getOrganizationSet(); + KeyValueSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + List organizations = potentialUpdate.getValueSet(); alreadyLinked .ifPresent( resOrg -> resOrg - .getOrganizationSet() + .getValueSet() .forEach(organizations::remove)); - String resultId = potentialUpdate.getResultId(); + String resultId = potentialUpdate.getKey(); organizations .forEach( - orgId -> { - newRelations - .add( - getRelation( - orgId, - resultId, - ModelConstants.IS_AUTHOR_INSTITUTION_OF, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - newRelations - .add( - getRelation( - resultId, - orgId, - ModelConstants.HAS_AUTHOR_INSTITUTION, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, - PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - }); + orgId -> newRelations + .addAll( + getOrganizationRelationPair( + orgId, + resultId, + PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, + PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)) + + ); return newRelations.iterator(); }; } - private static Dataset getPotentialRelations( + private static Dataset getPotentialRelations( SparkSession spark, String inputPath, Class resultClazz, @@ -179,7 +165,7 @@ public class SparkResultToOrganizationFromIstRepoJob { return spark .sql(RESULT_ORGANIZATIONSET_QUERY) - .as(Encoders.bean(ResultOrganizationSet.class)); + .as(Encoders.bean(KeyValueSet.class)); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java new file mode 100644 index 000000000..7984721e8 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/Leaves.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import java.io.Serializable; + +public class Leaves implements Serializable { + private String value; + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java new file mode 100644 index 000000000..23909fd9a --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfo.java @@ -0,0 +1,155 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import java.io.Serializable; +import java.util.*; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.*; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; + +/** + * Searches for all the association between result and organization already existing in the graph + * Creates also the parenthood hierarchy from the organizations + */ + +public class PrepareInfo implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class); + + // associate orgs with all their parent + private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet " + + + "FROM relation " + + "WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() + + "' and datainfo.deletedbyinference = false " + + "GROUP BY target"; + + // associates results with all the orgs they are affiliated to + private static final String RESULT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " + + "FROM relation " + + "WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + + "' and datainfo.deletedbyinference = false " + + "GROUP BY source"; + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkResultToOrganizationFromIstRepoJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String graphPath = parser.get("graphPath"); + log.info("graphPath: {}", graphPath); + + final String leavesPath = parser.get("leavesPath"); + log.info("leavesPath: {}", leavesPath); + + final String childParentPath = parser.get("childParentPath"); + log.info("childParentPath: {}", childParentPath); + + final String resultOrganizationPath = parser.get("resultOrgPath"); + log.info("resultOrganizationPath: {}", resultOrganizationPath); + + final String relationPath = parser.get("relationPath"); + log.info("relationPath: {}", relationPath); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> prepareInfo( + spark, + graphPath, + childParentPath, + leavesPath, + resultOrganizationPath, + relationPath)); + } + + private static void prepareInfo(SparkSession spark, String inputPath, String childParentOrganizationPath, + String currentIterationPath, String resultOrganizationPath, String relationPath) { + Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); + relation.createOrReplaceTempView("relation"); + + spark + .sql(ORGANIZATION_ORGANIZATION_QUERY) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(childParentOrganizationPath); + + spark + .sql(RESULT_ORGANIZATION_QUERY) + .as(Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(resultOrganizationPath); + + relation + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(relationPath); + + Dataset children = spark + .sql( + "Select distinct target as child from relation where " + + "lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() + + "' and datainfo.deletedbyinference = false") + .as(Encoders.STRING()); + + Dataset parent = spark + .sql( + "Select distinct source as parent from relation " + + "where lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() + + "' and datainfo.deletedbyinference = false") + .as(Encoders.STRING()); + + // takes from the join the entities having only the left hand side: the leaves. Saves them + children + .joinWith(parent, children.col("child").equalTo(parent.col("parent")), "left") + .map((MapFunction, String>) value -> { + if (Optional.ofNullable(value._2()).isPresent()) { + return null; + } + + return value._1(); + }, Encoders.STRING()) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .json(currentIterationPath); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java new file mode 100644 index 000000000..788eff0e3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PropagationCounter.java @@ -0,0 +1,77 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import java.io.Serializable; + +import org.apache.spark.util.LongAccumulator; + +public class PropagationCounter implements Serializable { + private LongAccumulator iterationOne; + private LongAccumulator iterationTwo; + private LongAccumulator iterationThree; + private LongAccumulator iterationFour; + private LongAccumulator iterationFive; + private LongAccumulator notReachedFirstParent; + + public PropagationCounter() { + } + + public PropagationCounter(LongAccumulator iterationOne, LongAccumulator iterationTwo, + LongAccumulator iterationThree, LongAccumulator iterationFour, LongAccumulator iterationFive, + LongAccumulator notReachedFirstParent) { + this.iterationOne = iterationOne; + this.iterationTwo = iterationTwo; + this.iterationThree = iterationThree; + this.iterationFour = iterationFour; + this.iterationFive = iterationFive; + this.notReachedFirstParent = notReachedFirstParent; + } + + public LongAccumulator getIterationOne() { + return iterationOne; + } + + public void setIterationOne(LongAccumulator iterationOne) { + this.iterationOne = iterationOne; + } + + public LongAccumulator getIterationTwo() { + return iterationTwo; + } + + public void setIterationTwo(LongAccumulator iterationTwo) { + this.iterationTwo = iterationTwo; + } + + public LongAccumulator getIterationThree() { + return iterationThree; + } + + public void setIterationThree(LongAccumulator iterationThree) { + this.iterationThree = iterationThree; + } + + public LongAccumulator getIterationFour() { + return iterationFour; + } + + public void setIterationFour(LongAccumulator iterationFour) { + this.iterationFour = iterationFour; + } + + public LongAccumulator getIterationFive() { + return iterationFive; + } + + public void setIterationFive(LongAccumulator iterationFive) { + this.iterationFive = iterationFive; + } + + public LongAccumulator getNotReachedFirstParent() { + return notReachedFirstParent; + } + + public void setNotReachedFirstParent(LongAccumulator notReachedFirstParent) { + this.notReachedFirstParent = notReachedFirstParent; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java new file mode 100644 index 000000000..cfc69a8f0 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java @@ -0,0 +1,234 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.util.LongAccumulator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class SparkResultToOrganizationFromSemRel implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); + private static final int MAX_ITERATION = 5; + public static final String NEW_RELATION_PATH = "/newRelation"; + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkResultToOrganizationFromIstRepoJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String relationPath = parser.get("relationPath"); + log.info("relationPath: {}", relationPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String leavesPath = parser.get("leavesPath"); + log.info("leavesPath: {}", leavesPath); + + final String childParentPath = parser.get("childParentPath"); + log.info("childParentPath: {}", childParentPath); + + final String resultOrganizationPath = parser.get("resultOrgPath"); + log.info("resultOrganizationPath: {}", resultOrganizationPath); + + final String workingPath = parser.get("workingDir"); + log.info("workingPath: {}", workingPath); + + final int iterations = Optional + .ofNullable(parser.get("iterations")) + .map(v -> { + if (Integer.valueOf(v) < MAX_ITERATION) { + return Integer.valueOf(v); + } else + return MAX_ITERATION; + }) + .orElse(MAX_ITERATION); + + log.info("iterations: {}", iterations); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> execPropagation( + spark, + leavesPath, + childParentPath, + resultOrganizationPath, + relationPath, + workingPath, + outputPath, + iterations)); + } + + public static void execPropagation(SparkSession spark, + String leavesPath, + String childParentPath, + String resultOrganizationPath, + String graphPath, + String workingPath, + String outputPath, + int iterations) { + if (iterations == 1) { + doPropagateOnce( + spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + workingPath, outputPath); + } else { + + final LongAccumulator iterationOne = spark.sparkContext().longAccumulator(ITERATION_ONE); + final LongAccumulator iterationTwo = spark.sparkContext().longAccumulator(ITERATION_TWO); + final LongAccumulator iterationThree = spark.sparkContext().longAccumulator(ITERATION_THREE); + final LongAccumulator iterationFour = spark.sparkContext().longAccumulator(ITERATION_FOUR); + final LongAccumulator iterationFive = spark.sparkContext().longAccumulator(ITERATION_FIVE); + final LongAccumulator notReachedFirstParent = spark.sparkContext().longAccumulator(ITERATION_NO_PARENT); + + final PropagationCounter propagationCounter = new PropagationCounter(iterationOne, + iterationTwo, + iterationThree, + iterationFour, + iterationFive, + notReachedFirstParent); + + doPropagate( + spark, leavesPath, childParentPath, resultOrganizationPath, graphPath, + workingPath, outputPath, propagationCounter); + } + + } + + private static void doPropagateOnce(SparkSession spark, String leavesPath, String childParentPath, + String resultOrganizationPath, String graphPath, String workingPath, + String outputPath) { + + StepActions + .execStep( + spark, graphPath, workingPath + NEW_RELATION_PATH, + leavesPath, childParentPath, resultOrganizationPath); + + addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); + } + + private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath, + String resultOrganizationPath, String graphPath, String workingPath, String outputPath, + PropagationCounter propagationCounter) { + int iteration = 0; + long leavesCount; + + do { + iteration++; + StepActions + .execStep( + spark, graphPath, workingPath + NEW_RELATION_PATH, + leavesPath, childParentPath, resultOrganizationPath); + StepActions + .prepareForNextStep( + spark, workingPath + NEW_RELATION_PATH, resultOrganizationPath, leavesPath, + childParentPath, workingPath + "/leaves", workingPath + "/resOrg"); + moveOutput(spark, workingPath, leavesPath, resultOrganizationPath); + leavesCount = readPath(spark, leavesPath, Leaves.class).count(); + } while (leavesCount > 0 && iteration < MAX_ITERATION); + + if (leavesCount == 0) { + switch (String.valueOf(iteration)) { + case "1": + propagationCounter.getIterationOne().add(1); + break; + case "2": + propagationCounter.getIterationTwo().add(1); + break; + case "3": + propagationCounter.getIterationThree().add(1); + break; + case "4": + propagationCounter.getIterationFour().add(1); + break; + case "5": + propagationCounter.getIterationFive().add(1); + break; + default: + break; + } + } else { + propagationCounter.getNotReachedFirstParent().add(1); + } + + addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath); + } + + private static void moveOutput(SparkSession spark, String workingPath, String leavesPath, + String resultOrganizationPath) { + readPath(spark, workingPath + "/leaves", Leaves.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(leavesPath); + + readPath(spark, workingPath + "/resOrg", KeyValueSet.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(resultOrganizationPath); + + } + + private static void addNewRelations(SparkSession spark, String newRelationPath, String outputPath) { + Dataset relation = readPath(spark, newRelationPath, Relation.class); + + relation + .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(Relation.class)) + .flatMap( + (FlatMapFunction) r -> Arrays + .asList( + r, getRelation( + r.getTarget(), r.getSource(), ModelConstants.IS_AUTHOR_INSTITUTION_OF, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) + .iterator() + + , Encoders.bean(Relation.class)) + .write() + + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java new file mode 100644 index 000000000..1adbbe60e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActions.java @@ -0,0 +1,204 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.PropagationConstant.readPath; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.spark.api.java.function.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; + +public class StepActions implements Serializable { + + public static void execStep(SparkSession spark, + String graphPath, String newRelationPath, + String leavesPath, String chldParentOrgPath, String resultOrgPath) { + + Dataset relationGraph = readPath(spark, graphPath, Relation.class); + // select only the relation source target among those proposed by propagation that are not already existent + getNewRels( + newRelationPath, relationGraph, + getPropagationRelation(spark, leavesPath, chldParentOrgPath, resultOrgPath)); + + } + + public static void prepareForNextStep(SparkSession spark, String selectedRelsPath, String resultOrgPath, + String leavesPath, String chldParentOrgPath, String leavesOutputPath, + String orgOutputPath) { + // use of the parents as new leaves set + changeLeavesSet(spark, leavesPath, chldParentOrgPath, leavesOutputPath); + + // add the new relations obtained from propagation to the keyvalueset result organization + updateResultOrganization( + spark, resultOrgPath, readPath(spark, selectedRelsPath, Relation.class), orgOutputPath); + } + + private static void updateResultOrganization(SparkSession spark, String resultOrgPath, + Dataset selectedRels, String outputPath) { + Dataset resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); + resultOrg + .joinWith( + selectedRels, resultOrg + .col("key") + .equalTo(selectedRels.col("source")), + "left") + .groupByKey((MapFunction, String>) mf -> mf._1().getKey(), Encoders.STRING()) + .mapGroups((MapGroupsFunction, KeyValueSet>) (key, it) -> { + Tuple2 first = it.next(); + if (!Optional.ofNullable(first._2()).isPresent()) { + return first._1(); + } + KeyValueSet ret = new KeyValueSet(); + ret.setKey(first._1().getKey()); + HashSet hs = new HashSet<>(); + hs.addAll(first._1().getValueSet()); + hs.add(first._2().getTarget()); + it.forEachRemaining(rel -> hs.add(rel._2().getTarget())); + ArrayList orgs = new ArrayList<>(); + orgs.addAll(hs); + ret.setValueSet(orgs); + return ret; + }, Encoders.bean(KeyValueSet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } + + private static void changeLeavesSet(SparkSession spark, String leavesPath, String chldParentOrgPath, + String leavesOutputPath) { + Dataset childParent = readPath(spark, chldParentOrgPath, KeyValueSet.class); + Dataset leaves = readPath(spark, leavesPath, Leaves.class); + + childParent.createOrReplaceTempView("childParent"); + leaves.createOrReplaceTempView("leaves"); + + spark + .sql( + "SELECT distinct parent as value " + + "FROM leaves " + + "JOIN (SELECT key, parent " + + " FROM childParent " + + " LATERAL VIEW explode(valueSet) kv as parent) tmp " + + "ON value = key ") + .as(Encoders.bean(Leaves.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(leavesOutputPath); + } + + @NotNull + private static void getNewRels(String newRelationPath, Dataset relationDataset, + Dataset newRels) { + // selects new, not already existent relations + // union of new propagation relations to the relation set + // grouping from sourcetarget (we are sure the only relations are those from result to organization by + // construction of the set) + // if at least one relation in the set was not produced by propagation no new relation will be returned + + relationDataset + .union(newRels) + .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + + ArrayList relationList = new ArrayList<>(); + relationList.add(it.next()); + it.forEachRemaining(rel -> relationList.add(rel)); + + if (relationList + .stream() + .filter( + rel -> !rel + .getDataInfo() + .getProvenanceaction() + .getClassid() + .equals(PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID)) + .count() > 0) { + return null; + } + + return new ObjectMapper().writeValueAsString(relationList.get(0)); + + }, Encoders.STRING()) + .filter(Objects::nonNull) + .map( + (MapFunction) r -> new ObjectMapper().readValue(r, Relation.class), + Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(newRelationPath); + + } + + // get the possible relations from propagation + private static Dataset getPropagationRelation(SparkSession spark, + String leavesPath, + String chldParentOrgPath, + String resultOrgPath) { + + Dataset childParent = readPath(spark, chldParentOrgPath, KeyValueSet.class); + Dataset resultOrg = readPath(spark, resultOrgPath, KeyValueSet.class); + Dataset leaves = readPath(spark, leavesPath, Leaves.class); + + childParent.createOrReplaceTempView("childParent"); + resultOrg.createOrReplaceTempView("resultOrg"); + leaves.createOrReplaceTempView("leaves"); + + Dataset resultParent = spark + .sql( + "SELECT resId as key, " + + "collect_set(parent) valueSet " + + "FROM (SELECT key as child, parent " + + " FROM childParent " + + " LATERAL VIEW explode(valueSet) ks as parent) as cp " + + "JOIN leaves " + + "ON leaves.value = cp.child " + + "JOIN (" + + "SELECT key as resId, org " + + "FROM resultOrg " + + "LATERAL VIEW explode (valueSet) ks as org ) as ro " + + "ON leaves.value = ro.org " + + "GROUP BY resId") + .as(Encoders.bean(KeyValueSet.class)); + + // create new relations from result to organization for each result linked to a leaf + return resultParent + .flatMap( + (FlatMapFunction) v -> v + .getValueSet() + .stream() + .map( + orgId -> getRelation( + v.getKey(), + orgId, + ModelConstants.HAS_AUTHOR_INSTITUTION, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) + .collect(Collectors.toList()) + .iterator(), + Encoders.bean(Relation.class)); + + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json new file mode 100644 index 000000000..c79bfe05d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_preparation_parameter.json @@ -0,0 +1,44 @@ +[ + { + "paramName":"gp", + "paramLongName":"graphPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "rep", + "paramLongName": "relationPath", + "paramDescription": "the path where to store the selected subset of relations", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json new file mode 100644 index 000000000..e09cd62fa --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/input_propagation_parameter.json @@ -0,0 +1,56 @@ +[ + { + "paramName":"rep", + "paramLongName":"relationPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "wd", + "paramLongName": "workingDir", + "paramDescription": "true if it is a test running", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "it", + "paramLongName": "iterations", + "paramDescription": "the number of iterations to be computed", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml new file mode 100644 index 000000000..3f0530aaf --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,194 @@ + + + + sourcePath + the source path + + + outputPath + sets the outputPath + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('resumeFrom') eq 'PrepareInfo'} + + + + + + + + + + + + + + + + + + + + + + + + + + + ${nameNode}/${sourcePath}/relation + ${nameNode}/${outputPath}/relation + + + + + + + + ${nameNode}/${sourcePath}/publication + ${nameNode}/${outputPath}/publication + + + + + + + + ${nameNode}/${sourcePath}/dataset + ${nameNode}/${outputPath}/dataset + + + + + + + + ${nameNode}/${sourcePath}/otherresearchproduct + ${nameNode}/${outputPath}/otherresearchproduct + + + + + + + + ${nameNode}/${sourcePath}/software + ${nameNode}/${outputPath}/software + + + + + + + + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --graphPath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --leavesPath${workingDir}/preparedInfo/leavesPath + --childParentPath${workingDir}/preparedInfo/childParentPath + --resultOrgPath${workingDir}/preparedInfo/resultOrgPath + --relationPath${workingDir}/preparedInfo/relation + + + + + + + + yarn + cluster + resultToOrganizationFromSemRel + eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --relationPath${workingDir}/preparedInfo/relation + --outputPath${outputPath}/relation + --leavesPath${workingDir}/preparedInfo/leavesPath + --childParentPath${workingDir}/preparedInfo/childParentPath + --resultOrgPath${workingDir}/preparedInfo/resultOrgPath + --hive_metastore_uris${hive_metastore_uris} + --workingDir${workingDir}/working + --iterations${iterations} + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/site/markdown/about.md b/dhp-workflows/dhp-enrichment/src/site/markdown/about.md new file mode 100644 index 000000000..c220c63b2 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/site/markdown/about.md @@ -0,0 +1 @@ +#DHP Enrichment \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/site/site.xml b/dhp-workflows/dhp-enrichment/src/site/site.xml new file mode 100644 index 000000000..dad0cd996 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/site/site.xml @@ -0,0 +1,26 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java new file mode 100644 index 000000000..2d2668db3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/PrepareInfoJobTest.java @@ -0,0 +1,579 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.readPath; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class PrepareInfoJobTest { + + private static final Logger log = LoggerFactory.getLogger(PrepareInfoJobTest.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(PrepareInfoJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareInfoJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void childParentTest1() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/childParentOrg/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(6, verificationDs.count()); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + verificationDs + .filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .get(0)); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + verificationDs + .foreach((ForeachFunction) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); + + } + + @Test + public void childParentTest2() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/childParentOrg/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(5, verificationDs.count()); + + Assertions + .assertEquals( + 0, verificationDs.filter("key = '20|dedup_wf_001::2899e571609779168222fdeb59cb916d'").count()); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + verificationDs + .filter("key = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .get(0)); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + } + + @Test + public void relationTest() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(7, verificationDs.count()); + + } + + @Test + public void resultOrganizationTest1() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/resultOrganization/") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(KeyValueSet.class)); + + Assertions.assertEquals(5, verificationDs.count()); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 2, verificationDs + .filter("key = '50|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0")); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|dedup_wf_001::2899e571609779168222fdeb59cb916d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '50|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::03748bcb5d754c951efec9700e18a56d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '50|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|openaire____::ec653e804967133b9436fdd30d3ff51d'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + Assertions + .assertEquals( + 1, verificationDs + .filter("key = '50|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + verificationDs + .filter("key = '50|doajarticles::1cae0b82b56ccd97c2db1f698def7074'") + .collectAsList() + .get(0) + .getValueSet() + .contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1")); + + verificationDs + .foreach((ForeachFunction) v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); + + } + + @Test + public void foundLeavesTest1() throws Exception { + + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/currentIteration/") + .map(item -> OBJECT_MAPPER.readValue(item, String.class)); + + Assertions.assertEquals(0, tmp.count()); + + } + + @Test + public void foundLeavesTest2() throws Exception { + PrepareInfo + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphPath", getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1") + .getPath(), + "-hive_metastore_uris", "", + "-leavesPath", workingDir.toString() + "/currentIteration/", + "-resultOrgPath", workingDir.toString() + "/resultOrganization/", + "-childParentPath", workingDir.toString() + "/childParentOrg/", + "-relationPath", workingDir.toString() + "/relation" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/currentIteration/") + .map(item -> OBJECT_MAPPER.readValue(item, Leaves.class)); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Leaves.class)); + + Assertions.assertEquals(3, verificationDs.count()); + + Assertions + .assertEquals( + 1, verificationDs + .filter("value = '20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0'") + .count()); + + Assertions + .assertEquals( + 1, verificationDs + .filter("value = '20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1'") + .count()); + + Assertions + .assertEquals( + 1, verificationDs + .filter("value = '20|pippo_wf_001::2899e571609779168222fdeb59cb916d'") + .count()); + + verificationDs.foreach((ForeachFunction) l -> System.out.println(OBJECT_MAPPER.writeValueAsString(l))); + + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java new file mode 100644 index 000000000..7dd575b66 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/SparkJobTest.java @@ -0,0 +1,325 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; +import static eu.dnetlib.dhp.PropagationConstant.readPath; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class SparkJobTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(SparkJobTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareInfoJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void completeExecution() throws Exception { + + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep") + .getPath(); + final String leavesPath = getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(); + final String childParentPath = getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(); + + final String resultOrgPath = getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(); + + readPath(spark, leavesPath, Leaves.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/leavesInput"); + + readPath(spark, resultOrgPath, KeyValueSet.class) + .write() + .option("compression", "gzip") + .json(workingDir.toString() + "/orgsInput"); + + SparkResultToOrganizationFromSemRel + + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-relationPath", graphPath, + "-hive_metastore_uris", "", + "-outputPath", workingDir.toString() + "/finalrelation", + "-leavesPath", workingDir.toString() + "/leavesInput", + "-resultOrgPath", workingDir.toString() + "/orgsInput", + "-childParentPath", childParentPath, + "-workingDir", workingDir.toString() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/finalrelation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + + Assertions.assertEquals(18, tmp.count()); + tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); + tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); + tmp + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); + tmp + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + r.getDataInfo().getProvenanceaction().getClassid())); + tmp + .foreach( + r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, + r.getDataInfo().getProvenanceaction().getClassname())); + tmp + .foreach( + r -> Assertions + .assertEquals( + "0.85", + r.getDataInfo().getTrust())); + + Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); + tmp + .filter(r -> r.getSource().substring(0, 3).equals("50|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 3, tmp.filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); + tmp + .filter(r -> r.getSource().substring(0, 3).equals("20|")) + .foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + Assertions + .assertEquals( + 2, tmp.filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")).count()); + Assertions + .assertEquals( + 3, tmp.filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")).count()); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("20|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|openaire____::ec653e804967133b9436fdd30d3ff51d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::03748bcb5d754c951efec9700e18a56d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .map(r -> r.getTarget()) + .collect() + .contains("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + + Assertions + .assertTrue( + tmp + .filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .map(r -> r.getTarget()) + .collect() + .contains("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java new file mode 100644 index 000000000..5c715f3b9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfromsemrel/StepActionsTest.java @@ -0,0 +1,411 @@ + +package eu.dnetlib.dhp.resulttoorganizationfromsemrel; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.KeyValueSet; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class StepActionsTest { + + private static final Logger log = LoggerFactory.getLogger(StepActionsTest.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(StepActionsTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareInfoJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareInfoJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void execStepTest() { + + StepActions + .execStep( + spark, getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/") + .getPath(), + workingDir.toString() + "/newRelationPath", + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/newRelationPath") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(4, tmp.count()); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals("propagation", r.getDataInfo().getInferenceprovenance())); + + verificationDs + .foreach((ForeachFunction) r -> Assertions.assertEquals("0.85", r.getDataInfo().getTrust())); + + verificationDs + .foreach((ForeachFunction) r -> Assertions.assertEquals("50|", r.getSource().substring(0, 3))); + + verificationDs + .foreach((ForeachFunction) r -> Assertions.assertEquals("20|", r.getTarget().substring(0, 3))); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, + r.getDataInfo().getProvenanceaction().getClassid())); + + verificationDs + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals( + PropagationConstant.PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME, + r.getDataInfo().getProvenanceaction().getClassname())); + + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074", r.getTarget())); + + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .foreach( + (ForeachFunction) r -> Assertions + .assertEquals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", r.getTarget())); + + Assertions + .assertEquals( + 2, + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .count()); + + Assertions + .assertEquals( + 1, + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d") && + r.getTarget().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .count()); + + Assertions + .assertEquals( + 1, + verificationDs + .filter( + (FilterFunction) r -> r + .getSource() + .equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d") && + r.getTarget().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .count()); + + tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + } + + @Test + public void prepareForNextStepLeavesTest() { + + StepActions + .prepareForNextStep( + spark, + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(), + workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/tempLeaves") + .map(item -> OBJECT_MAPPER.readValue(item, Leaves.class)); + + Assertions.assertEquals(3, tmp.count()); + + Assertions + .assertEquals( + 1, tmp.filter(l -> l.getValue().equals("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")).count()); + + Assertions + .assertEquals( + 1, tmp.filter(l -> l.getValue().equals("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")).count()); + + Assertions + .assertEquals( + 1, tmp.filter(l -> l.getValue().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); + + } + + @Test + public void prepareFonNextStepOrgTest() { + StepActions + .prepareForNextStep( + spark, + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/") + .getPath(), + getClass() + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/") + .getPath(), + workingDir.toString() + "/tempLeaves", workingDir.toString() + "/tempOrgs"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/tempOrgs") + .map(item -> OBJECT_MAPPER.readValue(item, KeyValueSet.class)); + + Assertions.assertEquals(5, tmp.count()); + + Assertions + .assertEquals( + 1, tmp + .filter(kv -> kv.getKey().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::1cae0b82b56ccd97c2db1f698def7074", + tmp + .filter(kv -> kv.getKey().equals("50|openaire____::ec653e804967133b9436fdd30d3ff51d")) + .collect() + .get(0) + .getValueSet() + .get(0)); + + Assertions + .assertEquals( + 1, tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertEquals( + "20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f", + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::03748bcb5d754c951efec9700e18a56d")) + .collect() + .get(0) + .getValueSet() + .get(0)); + + Assertions + .assertEquals( + 4, tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|dedup_wf_001::2899e571609779168222fdeb59cb916d")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + Assertions + .assertEquals( + 2, tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .collect() + .get(0) + .getValueSet() + .contains("20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")); + + Assertions + .assertEquals( + 3, tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .size()); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .contains("20|dedup_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .contains("20|pippo_wf_001::2899e571609779168222fdeb59cb916d")); + Assertions + .assertTrue( + tmp + .filter(kv -> kv.getKey().equals("50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")) + .collect() + .get(0) + .getValueSet() + .contains("20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f")); + + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation new file mode 100644 index 000000000..c63a2e0ac --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest1/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation new file mode 100644 index 000000000..54589de32 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/childparenttest2/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent new file mode 100644 index 000000000..7d9ea588b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/childParentOrg/childparent @@ -0,0 +1,6 @@ +{"key":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]} +{"key":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} +{"key":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|openaire____::ec653e804967133b9436fdd30d3ff51d"]} +{"key":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0","valueSet":["20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} +{"key":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]} +{"key":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|doajarticles::03748bcb5d754c951efec9700e18a56d"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves new file mode 100644 index 000000000..3be9cae3b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/currentIteration/leaves @@ -0,0 +1,3 @@ +{"value":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"value":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"value":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation new file mode 100644 index 000000000..db7db8fdd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relation @@ -0,0 +1,14 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isparentof","relType":"datasourceOrganization","source":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation new file mode 100644 index 000000000..32b816ef7 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/relsforiteration1/relation @@ -0,0 +1,4 @@ +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false,"validationDate":null,"properties":[]} +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","validated":false,"validationDate":null,"properties":[]} +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d","validated":false,"validationDate":null,"properties":[]} +{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.85","inferenceprovenance":"propagation","provenanceaction":{"classid":"result:organization:semrel","classname":"Propagation of affiliation to result through sematic relations","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultOrganization","subRelType":"affiliation","relClass":"hasAuthorInstitution","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization new file mode 100644 index 000000000..b4e227227 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/execstep/resultOrganization/resultorganization @@ -0,0 +1,5 @@ +{"key":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|dedup_wf_001::2899e571609779168222fdeb59cb916d"]} +{"key":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","valueSet":["20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"]} +{"key":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","valueSet":["20|pippo_wf_001::2899e571609779168222fdeb59cb916d","20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"]} +{"key":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","valueSet":["20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"]} +{"key":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","valueSet":["20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation new file mode 100644 index 000000000..5aeabb71b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/resultorganizationtest/relation @@ -0,0 +1,7 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|pippo_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::03748bcb5d754c951efec9700e18a56d","subRelType":"provision","target":"20|doajarticles::2baa9032dc058d3c8ff780c426b0c19f"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"hasAuthorInstitution","relType":"datasourceOrganization","source":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java index d43d7ce28..2e2ea567a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java @@ -88,7 +88,7 @@ public class CleanGraphSparkJob { readTableFromPath(spark, inputPath, clazz) .map((MapFunction) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz)) .map((MapFunction) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz)) - .map((MapFunction) GraphCleaningFunctions::cleanup, Encoders.bean(clazz)) + .map((MapFunction) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz)) .filter((FilterFunction) GraphCleaningFunctions::filter) .write() .mode(SaveMode.Overwrite) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 5502fd391..102a1fa85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -30,6 +30,11 @@ public class OafCleaner implements Serializable { } } else if (hasMapping(o, mapping)) { mapping.get(o.getClass()).accept(o); + for (final Field f : getAllFields(o.getClass())) { + f.setAccessible(true); + final Object val = f.get(o); + navigate(val, mapping); + } } else { for (final Field f : getAllFields(o.getClass())) { f.setAccessible(true); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 1beea5cb4..aad3a8706 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -12,13 +12,12 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.*; import eu.dnetlib.dhp.schema.dump.oaf.AccessRight; import eu.dnetlib.dhp.schema.dump.oaf.Author; -import eu.dnetlib.dhp.schema.dump.oaf.Country; import eu.dnetlib.dhp.schema.dump.oaf.GeoLocation; import eu.dnetlib.dhp.schema.dump.oaf.Instance; -import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; +import eu.dnetlib.dhp.schema.dump.oaf.Measure; import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; -import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.dump.oaf.community.CfHbKeyValue; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Context; @@ -56,7 +55,8 @@ public class ResultMapper implements Serializable { String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); out .setBestaccessright( - AccessRight + + BestAccessRight .newInstance( code, Constants.coarCodeLabelMap.get(code), @@ -81,7 +81,7 @@ public class ResultMapper implements Serializable { if (c.getClassid().equals((ModelConstants.UNKNOWN))) { return null; } - Country country = new Country(); + ResultCountry country = new ResultCountry(); country.setCode(c.getClassid()); country.setLabel(c.getClassname()); Optional @@ -124,7 +124,17 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); + out.setOriginalId(new ArrayList<>()); + Optional + .ofNullable(input.getOriginalId()) + .ifPresent( + v -> out + .setOriginalId( + input + .getOriginalId() + .stream() + .filter(s -> !s.startsWith("50|")) + .collect(Collectors.toList()))); Optional> oInst = Optional .ofNullable(input.getInstance()); @@ -148,7 +158,7 @@ public class ResultMapper implements Serializable { Optional oL = Optional.ofNullable(input.getLanguage()); if (oL.isPresent()) { eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); + out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname())); } Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); if (oLong.isPresent()) { @@ -184,7 +194,7 @@ public class ResultMapper implements Serializable { value .stream() .map( - p -> ControlledField + p -> ResultPid .newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); @@ -219,7 +229,7 @@ public class ResultMapper implements Serializable { input .getCollectedfrom() .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) + .map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue())) .collect(Collectors.toList())); Set communities = communityMap.keySet(); @@ -412,12 +422,12 @@ public class ResultMapper implements Serializable { instance .setCollectedfrom( - KeyValue + CfHbKeyValue .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); instance .setHostedby( - KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); + CfHbKeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); return instance; @@ -436,6 +446,17 @@ public class ResultMapper implements Serializable { code, Constants.coarCodeLabelMap.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); + + Optional> mes = Optional.ofNullable(i.getMeasures()); + if (mes.isPresent()) { + List measure = new ArrayList<>(); + mes + .get() + .forEach( + m -> m.getUnit().forEach(u -> measure.add(Measure.newInstance(m.getId(), u.getValue())))); + instance.setMeasures(measure); + } + if (opAr.get().getOpenAccessRoute() != null) { switch (opAr.get().getOpenAccessRoute()) { case hybrid: @@ -463,7 +484,7 @@ public class ResultMapper implements Serializable { .setPid( pid .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> ResultPid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); Optional @@ -473,7 +494,7 @@ public class ResultMapper implements Serializable { .setAlternateIdentifier( ai .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> AlternateIdentifier.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); Optional @@ -553,7 +574,7 @@ public class ResultMapper implements Serializable { private static Subject getSubject(StructuredProperty s) { Subject subject = new Subject(); - subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); + subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue())); Optional di = Optional.ofNullable(s.getDataInfo()); if (di.isPresent()) { Provenance p = new Provenance(); @@ -575,7 +596,7 @@ public class ResultMapper implements Serializable { Optional> oPids = Optional .ofNullable(oa.getPid()); if (oPids.isPresent()) { - Pid pid = getOrcid(oPids.get()); + AuthorPid pid = getOrcid(oPids.get()); if (pid != null) { a.setPid(pid); } @@ -584,12 +605,12 @@ public class ResultMapper implements Serializable { return a; } - private static Pid getAuthorPid(StructuredProperty pid) { + private static AuthorPid getAuthorPid(StructuredProperty pid) { Optional di = Optional.ofNullable(pid.getDataInfo()); if (di.isPresent()) { - return Pid + return AuthorPid .newInstance( - ControlledField + AuthorPidSchemeValue .newInstance( pid.getQualifier().getClassid(), pid.getValue()), @@ -598,9 +619,9 @@ public class ResultMapper implements Serializable { di.get().getProvenanceaction().getClassname(), di.get().getTrust())); } else { - return Pid + return AuthorPid .newInstance( - ControlledField + AuthorPidSchemeValue .newInstance( pid.getQualifier().getClassid(), pid.getValue()) @@ -609,7 +630,7 @@ public class ResultMapper implements Serializable { } } - private static Pid getOrcid(List p) { + private static AuthorPid getOrcid(List p) { List pidList = p.stream().map(pid -> { if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) || (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index 120de9327..c422c3b40 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -41,7 +41,7 @@ public class CreateContextEntities implements Serializable { .toString( CreateContextEntities.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index a468e334d..03f53de64 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -48,7 +48,7 @@ public class CreateContextRelation implements Serializable { .requireNonNull( CreateContextRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 7f64db41c..c9de86613 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import static com.jayway.jsonpath.Filter.filter; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -11,9 +10,7 @@ import java.util.stream.Collectors; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; @@ -22,8 +19,6 @@ import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -133,14 +128,14 @@ public class DumpGraphEntities implements Serializable { .ifPresent( pids -> pids .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList())); Optional .ofNullable(d.getDatasourcetype()) .ifPresent( dsType -> datasource - .setDatasourcetype(ControlledField.newInstance(dsType.getClassid(), dsType.getClassname()))); + .setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname()))); Optional .ofNullable(d.getOpenairecompatibility()) @@ -499,7 +494,7 @@ public class DumpGraphEntities implements Serializable { .ifPresent( value -> { if (!value.getClassid().equals(Constants.UNKNOWN)) { - organization.setCountry(Qualifier.newInstance(value.getClassid(), value.getClassname())); + organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname())); } }); @@ -515,7 +510,7 @@ public class DumpGraphEntities implements Serializable { .setPid( value .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); return organization; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java index 3e47ee664..cb4e42ac7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java @@ -89,7 +89,7 @@ public class Extractor implements Serializable { value.getId(), contextId, Constants.RESULT_ENTITY, Constants.CONTEXT_ENTITY, - ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, provenance); + ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance); if (!hashCodes.contains(r.hashCode())) { relationList .add(r); @@ -99,8 +99,8 @@ public class Extractor implements Serializable { contextId, value.getId(), Constants.CONTEXT_ENTITY, Constants.RESULT_ENTITY, - ModelConstants.RELATIONSHIP, - ModelConstants.IS_RELATED_TO, provenance); + ModelConstants.IS_RELATED_TO, + ModelConstants.RELATIONSHIP, provenance); if (!hashCodes.contains(r.hashCode())) { relationList .add( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java index 671bccd25..1c98ea6cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java @@ -31,7 +31,7 @@ public class SparkCollectAndSave implements Serializable { .toString( SparkCollectAndSave.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java index 8b282386f..7cc05a821 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java @@ -22,7 +22,7 @@ public class SparkDumpEntitiesJob implements Serializable { .toString( SparkDumpEntitiesJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index ddfd6592f..0ac97dcb6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -4,10 +4,11 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Optional; +import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -37,7 +38,7 @@ public class SparkDumpRelationJob implements Serializable { .toString( SparkDumpRelationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -54,6 +55,12 @@ public class SparkDumpRelationJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + Optional rs = Optional.ofNullable(parser.get("removeSet")); + final Set removeSet = new HashSet<>(); + if (rs.isPresent()) { + Collections.addAll(removeSet, rs.get().split(";")); + } + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -61,15 +68,16 @@ public class SparkDumpRelationJob implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - dumpRelation(spark, inputPath, outputPath); + dumpRelation(spark, inputPath, outputPath, removeSet); }); } - private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) { + private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set removeSet) { Dataset relations = Utils.readPath(spark, inputPath, Relation.class); relations + .filter((FilterFunction) r -> !removeSet.contains(r.getRelClass())) .map((MapFunction) relation -> { eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); relNew diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index f9d2123e2..dff1a1066 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -39,7 +39,7 @@ public class SparkOrganizationRelation implements Serializable { .toString( SparkOrganizationRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index 20f3fc4a7..1e5675e5f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -35,7 +35,7 @@ public class SparkSelectValidRelationsJob implements Serializable { .toString( SparkSelectValidRelationsJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala deleted file mode 100644 index ce383292c..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ /dev/null @@ -1,142 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostedbymap - -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} -import org.apache.spark.sql.expressions.Aggregator - - -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} -case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} - -object Aggregators { - - - - def getId(s1:String, s2:String) : String = { - if (s1.startsWith("10|")){ - return s1} - s2 - } - - def getValue(s1:String, s2:String) : String = { - if(!s1.equals("")){ - return s1 - } - s2 - } - - - def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { - val transformedData : Dataset[(String, HostedByItemType)] = df - .groupByKey(_._1)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:(String, HostedByItemType)) => res - }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) - - transformedData - } - - val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { - override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) - override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { - return merge(b, a) - } - override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - if(b1._2.id.startsWith("10|")){ - return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess)) - - } - return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess)) - - } - override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction - override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) - - override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) - }.toColumn - - - - - def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ - override def zero: EntityInfo = EntityInfo.newInstance("","","") - - override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { - return merge(b, a) - } - override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - if(!b1.getHostedById.equals("")){ - b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) - return b1 - } - b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) - b2 - - } - override def finish(reduction: EntityInfo): EntityInfo = reduction - override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - - override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - }.toColumn - - def resultToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { - val transformedData : Dataset[EntityInfo] = df - .groupByKey(_.getId)(Encoders.STRING) - .agg(Aggregators.resultToSingleIdAggregator) - .map{ - case (id:String , res: EntityInfo) => res - }(Encoders.bean(classOf[EntityInfo])) - - transformedData - } - - def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ - override def zero: EntityInfo = EntityInfo.newInstance("","","") - - override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { - return merge(b, a) - } - override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - if(!b1.getHostedById.equals("")){ - return b1 - } - b2 - - } - override def finish(reduction: EntityInfo): EntityInfo = reduction - override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - - override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - }.toColumn - - - def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { - val transformedData : Dataset[EntityInfo] = df - .groupByKey(_.getHostedById)(Encoders.STRING) - .agg(Aggregators.datasourceToSingleIdAggregator) - .map{ - case (id:String , res: EntityInfo) => res - }(Encoders.bean(classOf[EntityInfo])) - - transformedData - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala deleted file mode 100644 index 1ee1d5d1a..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ /dev/null @@ -1,222 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostedbymap - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.schema.oaf.Datasource -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.json4s.DefaultFormats -import org.slf4j.{Logger, LoggerFactory} -import com.fasterxml.jackson.databind.ObjectMapper -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.FileSystem -import org.apache.hadoop.fs.Path -import java.io.PrintWriter - -import org.apache.hadoop.io.compress.GzipCodec - - -object SparkProduceHostedByMap { - - - implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - - - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { - val openaire: HostedByInfo = input._1._1 - val doaj: HostedByInfo = input._1._2 - val gold: HostedByInfo = input._2 - val isOpenAccess: Boolean = doaj == null && gold == null - - openaire.journal_id match { - case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) - - // catch the default with a variable so you can print it - case whoa => null - } - } - - def toHostedByMap(input: (String, HostedByItemType)): String = { - import org.json4s.jackson.Serialization - - implicit val formats = org.json4s.DefaultFormats - - val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) - - Serialization.write(map) - - - } - - - - def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { - if(issn != null){ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, issn, eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, eissn, "" , oa) - } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, issn, "", issnl , oa) - }else{ - HostedByItemType(id, officialname, issn, "", "" , oa) - } - } - }else{ - if(eissn != null){ - if(issnl != null){ - HostedByItemType(id, officialname, "", eissn, issnl , oa) - }else{ - HostedByItemType(id, officialname, "", eissn, "" , oa) - } - }else{ - if(issnl != null){ - HostedByItemType(id, officialname, "", "", issnl , oa) - }else{ - HostedByItemType("", "", "", "", "" , oa) - } - } - } - } - - def oaToHostedbyItemType(dats: Datasource): HostedByItemType = { - if (dats.getJournal != null) { - - return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) - } - HostedByItemType("","","","","",false) - } - - def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { - - import spark.implicits._ - - - val mapper = new ObjectMapper() - - implicit var encoderD = Encoders.kryo[Datasource] - - val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) - .map(r => mapper.readValue(r, classOf[Datasource])) - - dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) - - } - - - def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { - return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssnL, true) - } - - - def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { - import spark.implicits._ - - implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] - - val mapper = new ObjectMapper() - - val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) - .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) - - dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) - - } - - def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { - - return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) - } - - def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { - import spark.implicits._ - - implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] - - val mapper = new ObjectMapper() - - val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) - .map(r => mapper.readValue(r, classOf[DOAJModel])) - - dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) - - } - - def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { - var lst : List[(String, HostedByItemType)] = List() - if(!input.issn.equals("")){ - lst = (input.issn, input) :: lst - } - if(!input.eissn.equals("")){ - lst = (input.eissn, input) :: lst - } - if(!input.lissn.equals("")){ - lst = (input.lissn, input) :: lst - } - lst - } - - - - def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { - val conf = new Configuration() - - conf.set("fs.defaultFS", hdfsNameNode) - val fs= FileSystem.get(conf) - val output = fs.create(new Path(outputPath)) - val writer = new PrintWriter(output) - try { - input.foreach(hbi => writer.println(hbi)) - } - finally { - writer.close() - - } - - } - - - - def main(args: Array[String]): Unit = { - - val logger: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val datasourcePath = parser.get("datasourcePath") - val workingDirPath = parser.get("workingPath") - val outputPath = parser.get("outputPath") - - - implicit val formats = DefaultFormats - - - logger.info("Getting the Datasources") - - - Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) - .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) - .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) - .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) - .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) - - - } - - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 526f45f6e..3e8ca1763 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -347,6 +347,10 @@ public abstract class AbstractMdRecordToOafMapper { r.setCoverage(prepareCoverages(doc, info)); r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES + r + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + r + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala deleted file mode 100644 index c7ad1890d..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ /dev/null @@ -1,74 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.raw - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo -import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.commons.io.IOUtils -import org.apache.commons.lang3.StringUtils -import org.apache.http.client.methods.HttpGet -import org.apache.http.impl.client.HttpClients -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} -import org.apache.spark.{SparkConf, SparkContext} -import org.slf4j.LoggerFactory - -import scala.collection.JavaConverters._ -import scala.io.Source - -object CopyHdfsOafSparkApplication { - - def main(args: Array[String]): Unit = { - val log = LoggerFactory.getLogger(getClass) - val conf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json")).mkString) - parser.parseArgument(args) - - val spark = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - val sc: SparkContext = spark.sparkContext - - val mdstoreManagerUrl = parser.get("mdstoreManagerUrl") - log.info("mdstoreManagerUrl: {}", mdstoreManagerUrl) - - val mdFormat = parser.get("mdFormat") - log.info("mdFormat: {}", mdFormat) - - val mdLayout = parser.get("mdLayout") - log.info("mdLayout: {}", mdLayout) - - val mdInterpretation = parser.get("mdInterpretation") - log.info("mdInterpretation: {}", mdInterpretation) - - val hdfsPath = parser.get("hdfsPath") - log.info("hdfsPath: {}", hdfsPath) - - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - - val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala - - val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList - - if (validPaths.nonEmpty) { - val oaf = spark.read.load(validPaths: _*).as[Oaf] - val mapper = new ObjectMapper() - val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList - l.foreach( - e => - oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) - .map(s => mapper.writeValueAsString(s))(Encoders.STRING) - .write - .option("compression", "gzip") - .mode(SaveMode.Append) - .text(s"$hdfsPath/${e}") - ) - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 194715295..639c1ab30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; @@ -11,8 +12,11 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; +import org.dom4j.Element; import org.dom4j.Node; +import com.google.common.collect.Lists; + import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.*; @@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareTitles(final Document doc, final DataInfo info) { - return prepareListStructProps( - doc, - "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']", - MAIN_TITLE_QUALIFIER, info); + + final List title = Lists.newArrayList(); + final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']"; + + for (Object o : doc.selectNodes(xpath)) { + Element e = (Element) o; + final String titleValue = e.getTextTrim(); + final String titleType = e.attributeValue("titleType"); + if (StringUtils.isNotBlank(titleType)) { + title + .add( + structuredProperty( + titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info)); + } else { + title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info)); + } + } + + return title; } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala deleted file mode 100644 index 316b8afed..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ /dev/null @@ -1,107 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.resolution - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.common.EntityType -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} -import org.apache.commons.io.IOUtils -import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.spark.SparkConf -import org.apache.spark.sql._ -import org.slf4j.{Logger, LoggerFactory} - -object SparkResolveEntities { - - val mapper = new ObjectMapper() - val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) - - def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val graphBasePath = parser.get("graphBasePath") - log.info(s"graphBasePath -> $graphBasePath") - val workingPath = parser.get("workingPath") - log.info(s"workingPath -> $workingPath") - val unresolvedPath = parser.get("unresolvedPath") - log.info(s"unresolvedPath -> $unresolvedPath") - - val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) - fs.mkdirs(new Path(workingPath)) - - resolveEntities(spark, workingPath, unresolvedPath) - generateResolvedEntities(spark, workingPath, graphBasePath) - - // TO BE conservative we keep the original entities in the working dir - // and save the resolved entities on the graphBasePath - //In future these lines of code should be removed - entities.foreach { - e => - fs.rename(new Path(s"$graphBasePath/$e"), new Path(s"$workingPath/${e}_old")) - fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) - } - -} - - -def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { - implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - import spark.implicits._ - - val rPid: Dataset[(String, String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] - val up: Dataset[(String, Result)] = spark.read.text(unresolvedPath).as[String].map(s => mapper.readValue(s, classOf[Result])).map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) - - rPid.joinWith(up, rPid("_2").equalTo(up("_1")), "inner").map { - r => - val result = r._2._2 - val dnetId = r._1._1 - result.setId(dnetId) - result - }.write.mode(SaveMode.Overwrite).save(s"$workingPath/resolvedEntities") - } - - - def deserializeObject(input:String, entity:EntityType ) :Result = { - - entity match { - case EntityType.publication => mapper.readValue(input, classOf[Publication]) - case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) - case EntityType.software=> mapper.readValue(input, classOf[Software]) - case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) - } - } - - def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { - - implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - import spark.implicits._ - - val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] - entities.foreach { - e => - - spark.read.text(s"$graphBasePath/$e").as[String] - .map(s => deserializeObject(s, e)) - .union(re) - .groupByKey(_.getId) - .reduceGroups { - (x, y) => - x.mergeFrom(y) - x - }.map(_._2) - .filter(r => r.getClass.getSimpleName.toLowerCase != "result") - .map(r => mapper.writeValueAsString(r))(Encoders.STRING) - .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingPath/resolvedGraph/$e") - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala deleted file mode 100644 index 4b82fe645..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ /dev/null @@ -1,72 +0,0 @@ -package eu.dnetlib.dhp.sx.graph - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} -object SparkConvertRDDtoDataset { - - def main(args: Array[String]): Unit = { - - - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - val sourcePath = parser.get("sourcePath") - log.info(s"sourcePath -> $sourcePath") - val t = parser.get("targetPath") - log.info(s"targetPath -> $t") - - val entityPath = s"$t/entities" - val relPath = s"$t/relation" - val mapper = new ObjectMapper() - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) - - - log.info("Converting dataset") - val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) - spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") - - - log.info("Converting publication") - val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) - spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") - - log.info("Converting software") - val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) - spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") - - log.info("Converting otherresearchproduct") - val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) - spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") - - - log.info("Converting Relation") - - - val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") - - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") - .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) - .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) - spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") - - - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala deleted file mode 100644 index 350b00c5e..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ /dev/null @@ -1,100 +0,0 @@ -package eu.dnetlib.dhp.sx.graph - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - - - -object SparkCreateInputGraph { - - def main(args: Array[String]): Unit = { - - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/extract_entities_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val resultObject = List( - ("publication", classOf[Publication]), - ("dataset", classOf[OafDataset]), - ("software", classOf[Software]), - ("otherResearchProduct", classOf[OtherResearchProduct]) - - ) - - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) - implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) - implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) - implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) - implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - - - - - - val sourcePath = parser.get("sourcePath") - log.info(s"sourcePath -> $sourcePath") - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - - - val oafDs:Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] - - - log.info("Extract Publication") - oafDs.filter(o => o.isInstanceOf[Publication]).map(p => p.asInstanceOf[Publication]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/publication") - - log.info("Extract dataset") - oafDs.filter(o => o.isInstanceOf[OafDataset]).map(p => p.asInstanceOf[OafDataset]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/dataset") - - log.info("Extract software") - oafDs.filter(o => o.isInstanceOf[Software]).map(p => p.asInstanceOf[Software]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/software") - - log.info("Extract otherResearchProduct") - oafDs.filter(o => o.isInstanceOf[OtherResearchProduct]).map(p => p.asInstanceOf[OtherResearchProduct]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/otherResearchProduct") - - log.info("Extract Relation") - oafDs.filter(o => o.isInstanceOf[Relation]).map(p => p.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$targetPath/extracted/relation") - - resultObject.foreach { r => - log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/preprocess/${r._1}",spark, r._2) - } - } - - - def extractEntities[T <: Oaf ](oafDs:Dataset[Oaf], targetPath:String, clazz:Class[T], log:Logger) :Unit = { - - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) - log.info(s"Extract ${clazz.getSimpleName}") - oafDs.filter(o => o.isInstanceOf[T]).map(p => p.asInstanceOf[T]).write.mode(SaveMode.Overwrite).save(targetPath) - } - - - def makeDatasetUnique[T <: Result ](sourcePath:String, targetPath:String, spark:SparkSession, clazz:Class[T]) :Unit = { - import spark.implicits._ - - implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) - - val ds:Dataset[T] = spark.read.load(sourcePath).as[T] - - ds.groupByKey(_.getId).reduceGroups{(x,y) => - x.mergeFrom(y) - x - }.map(_._2).write.mode(SaveMode.Overwrite).save(targetPath) - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala deleted file mode 100644 index e4fcd2782..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ /dev/null @@ -1,111 +0,0 @@ -package eu.dnetlib.dhp.sx.graph - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Relation -import eu.dnetlib.dhp.schema.sx.scholix.Scholix -import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary -import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils -import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.RelatedEntities -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql.functions.count -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -object SparkCreateScholix { - - def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_scholix_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - val relationPath = parser.get("relationPath") - log.info(s"relationPath -> $relationPath") - val summaryPath = parser.get("summaryPath") - log.info(s"summaryPath -> $summaryPath") - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - - - implicit val relEncoder: Encoder[Relation] = Encoders.kryo[Relation] - implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] - - import spark.implicits._ - - - val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] - .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - - val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] - .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, summaryEncoder)) - - - relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") - .map { input: ((String, Relation), (String, ScholixSummary)) => - if (input._1!= null && input._2!= null) { - val rel: Relation = input._1._2 - val source: ScholixSummary = input._2._2 - (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) - } - else null - }(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .filter(r => r!= null) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") - - val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) - - scholixSource.joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") - .map { input: ((String, Scholix), (String, ScholixSummary)) => - if (input._2== null) { - null - } else { - val s: Scholix = input._1._2 - val target: ScholixSummary = input._2._2 - ScholixUtils.generateCompleteScholix(s, target) - } - }.filter(s => s!= null).write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_one_verse") - - - val scholix_o_v: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] - - scholix_o_v.flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))).as[Scholix] - .map(s=> (s.getIdentifier,s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) - .groupByKey(_._1) - .agg(ScholixUtils.scholixAggregator.toColumn) - .map(s => s._2) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix") - - val scholix_final:Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] - - val stats:Dataset[(String,String,Long)]= scholix_final.map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)).groupBy("_1", "_2").agg(count("_1")).as[(String,String,Long)] - - - stats - .map(s => RelatedEntities(s._1, if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 )) - .groupByKey(_.id) - .reduceGroups((a, b) => RelatedEntities(a.id, a.relatedDataset+b.relatedDataset, a.relatedPublication+b.relatedPublication)) - .map(_._2) - .write.mode(SaveMode.Overwrite).save(s"$targetPath/related_entities") - - val relatedEntitiesDS:Dataset[RelatedEntities] = spark.read.load(s"$targetPath/related_entities").as[RelatedEntities].filter(r => r.relatedPublication>0 || r.relatedDataset > 0) - - relatedEntitiesDS.joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner").map{i => - val re = i._1 - val sum = i._2._2 - - sum.setRelatedDatasets(re.relatedDataset) - sum.setRelatedPublications(re.relatedPublication) - sum - }.write.mode(SaveMode.Overwrite).save(s"${summaryPath}_filtered") - - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala deleted file mode 100644 index 193512474..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala +++ /dev/null @@ -1,111 +0,0 @@ -package eu.dnetlib.dhp.sx.graph.pangaea - -import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Encoder, Encoders} -import org.json4s -import org.json4s.DefaultFormats -import org.json4s.jackson.JsonMethods.parse -import java.util.regex.Pattern -import scala.language.postfixOps -import scala.xml.{Elem, Node, XML} - -case class PangaeaDataModel(identifier:String, title:List[String], objectType:List[String], creator:List[String], - publisher:List[String], dataCenter :List[String],subject :List[String], language:String, - rights:String, parent:String,relation :List[String],linkage:List[(String,String)] ) {} - -object PangaeaUtils { - - - def toDataset(input:String):PangaeaDataModel = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: json4s.JValue = parse(input) - val xml= (json \ "xml").extract[String] - parseXml(xml) - } - - def findDOIInRelation( input:List[String]):List[String] = { - val pattern = Pattern.compile("\\b(10[.][0-9]{4,}(?:[.][0-9]+)*\\/(?:(?![\"&\\'<>])\\S)+)\\b") - input.map(i => { - val matcher = pattern.matcher(i) - if (matcher.find()) - matcher.group(0) - else - null - }).filter(i => i!= null) - } - - def attributeOpt(attribute: String, node:Node): Option[String] = - node.attribute(attribute) flatMap (_.headOption) map (_.text) - - def extractLinkage(node:Elem):List[(String, String)] = { - (node \ "linkage").map(n =>(attributeOpt("type",n), n.text)).filter(t => t._1.isDefined).map(t=> (t._1.get, t._2))(collection.breakOut) - } - - def parseXml(input:String):PangaeaDataModel = { - val xml = XML.loadString(input) - - val identifier = (xml \ "identifier").text - val title :List[String] = (xml \ "title").map(n => n.text)(collection.breakOut) - val pType :List[String] = (xml \ "type").map(n => n.text)(collection.breakOut) - val creators:List[String] = (xml \ "creator").map(n => n.text)(collection.breakOut) - val publisher :List[String] = (xml \ "publisher").map(n => n.text)(collection.breakOut) - val dataCenter :List[String] = (xml \ "dataCenter").map(n => n.text)(collection.breakOut) - val subject :List[String] = (xml \ "subject").map(n => n.text)(collection.breakOut) - val language= (xml \ "language").text - val rights= (xml \ "rights").text - val parentIdentifier= (xml \ "parentIdentifier").text - val relation :List[String] = (xml \ "relation").map(n => n.text)(collection.breakOut) - val relationFiltered = findDOIInRelation(relation) - val linkage:List[(String,String)] = extractLinkage(xml) - - PangaeaDataModel(identifier,title, pType, creators,publisher, dataCenter, subject, language, rights, parentIdentifier, relationFiltered, linkage) - } - - - def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] = new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel]{ - - - override def zero: PangaeaDataModel = null - - override def reduce(b: PangaeaDataModel, a: (String, PangaeaDataModel)): PangaeaDataModel = { - if (b == null) - a._2 - else { - if (a == null) - b - else { - if (b.title != null && b.title.nonEmpty) - b - else - a._2 - - } - } - } - - override def merge(b1: PangaeaDataModel, b2: PangaeaDataModel): PangaeaDataModel = { - if (b1 == null) - b2 - else { - if (b2 == null) - b1 - else { - if (b1.title != null && b1.title.nonEmpty) - b1 - else - b2 - - } - } - } - override def finish(reduction: PangaeaDataModel): PangaeaDataModel = reduction - - override def bufferEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - - override def outputEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - } - - - - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala deleted file mode 100644 index 93c554e04..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ /dev/null @@ -1,360 +0,0 @@ -package eu.dnetlib.dhp.sx.graph.scholix - - -import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} -import eu.dnetlib.dhp.schema.sx.scholix._ -import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} -import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Encoder, Encoders} -import org.json4s -import org.json4s.DefaultFormats -import org.json4s.jackson.JsonMethods.parse - -import scala.collection.JavaConverters._ -import scala.io.Source -import scala.language.postfixOps - -object ScholixUtils { - - - val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier" - - val DATE_RELATION_KEY:String = "RelationDate" - case class RelationVocabulary(original:String, inverse:String){} - - case class RelatedEntities(id:String, relatedDataset:Long, relatedPublication:Long){} - - val relations:Map[String, RelationVocabulary] = { - val input =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relations.json")).mkString - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - - lazy val json: json4s.JValue = parse(input) - - json.extract[Map[String, RelationVocabulary]] - } - - - def extractRelationDate(relation: Relation):String = { - - if (relation.getProperties== null || !relation.getProperties.isEmpty) - null - else { - val date =relation.getProperties.asScala.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey)).map(p => p.getValue) - if (date.isDefined) - date.get - else - null - } - } - - def extractRelationDate(summary: ScholixSummary):String = { - - if(summary.getDate== null || summary.getDate.isEmpty) - null - else { - summary.getDate.get(0) - } - - - } - - def inverseRelationShip(rel:ScholixRelationship):ScholixRelationship = { - new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName) - - - } - - - - val statsAggregator:Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] = new Aggregator[(String,String, Long), RelatedEntities, RelatedEntities] with Serializable { - override def zero: RelatedEntities = null - - override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = { - val id = a._1 - val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0 - val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0 - - if (b == null) - RelatedEntities(a._1, relatedDataset, relatedPublication) - else - RelatedEntities(a._1,b.relatedDataset+ relatedDataset, b.relatedPublication+ relatedPublication ) - } - - override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = { - if (b1!= null && b2!= null) - RelatedEntities(b1.id, b1.relatedDataset+ b2.relatedDataset, b1.relatedPublication+ b2.relatedPublication) - - else - if (b1!= null) - b1 - else - b2 - } - - override def finish(reduction: RelatedEntities): RelatedEntities = reduction - - override def bufferEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) - - override def outputEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities]) - } - - - val scholixAggregator: Aggregator[(String, Scholix), Scholix, Scholix] = new Aggregator[(String, Scholix), Scholix, Scholix] with Serializable { - override def zero: Scholix = null - - - def scholix_complete(s:Scholix):Boolean ={ - if (s== null || s.getIdentifier==null) { - false - } else if (s.getSource == null || s.getTarget == null) { - false - } - else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty) - false - else - true - } - - override def reduce(b: Scholix, a: (String, Scholix)): Scholix = { - if (scholix_complete(b)) b else a._2 - } - - override def merge(b1: Scholix, b2: Scholix): Scholix = { - if (scholix_complete(b1)) b1 else b2 - } - - override def finish(reduction: Scholix): Scholix = reduction - - override def bufferEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] - - override def outputEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] - } - - - def createInverseScholixRelation(scholix: Scholix):Scholix = { - val s = new Scholix - s.setPublicationDate(scholix.getPublicationDate) - s.setPublisher(scholix.getPublisher) - s.setLinkprovider(scholix.getLinkprovider) - s.setRelationship(inverseRelationShip(scholix.getRelationship)) - s.setSource(scholix.getTarget) - s.setTarget(scholix.getSource) - s.setIdentifier(DHPUtils.md5(s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}")) - s - - - - } - - - def extractCollectedFrom(summary:ScholixSummary): List[ScholixEntityId] = { - if (summary.getDatasources!= null && !summary.getDatasources.isEmpty) { - val l: List[ScholixEntityId] = summary.getDatasources.asScala.map{ - d => new ScholixEntityId(d.getDatasourceName, List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava) - }(collection.breakOut) - l - } else List() - } - - def extractCollectedFrom(relation: Relation) : List[ScholixEntityId] = { - if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { - - - val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { - c => - - new ScholixEntityId(c.getValue, List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA,null)).asJava) - }(collection breakOut) - l - } else List() - } - - - def generateCompleteScholix(scholix: Scholix, target:ScholixSummary): Scholix = { - val s = new Scholix - s.setPublicationDate(scholix.getPublicationDate) - s.setPublisher(scholix.getPublisher) - s.setLinkprovider(scholix.getLinkprovider) - s.setRelationship(scholix.getRelationship) - s.setSource(scholix.getSource) - s.setTarget(generateScholixResourceFromSummary(target)) - s.setIdentifier(DHPUtils.md5(s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}")) - s - } - - - def generateScholixResourceFromSummary(summaryObject: ScholixSummary): ScholixResource = { - val r = new ScholixResource - r.setIdentifier(summaryObject.getLocalIdentifier) - r.setDnetIdentifier(summaryObject.getId) - - r.setObjectType(summaryObject.getTypology.toString) - r.setObjectSubType(summaryObject.getSubType) - - if (summaryObject.getTitle!= null && !summaryObject.getTitle.isEmpty) - r.setTitle(summaryObject.getTitle.get(0)) - - if (summaryObject.getAuthor!= null && !summaryObject.getAuthor.isEmpty){ - val l:List[ScholixEntityId] = summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a,null)).toList - if (l.nonEmpty) - r.setCreator(l.asJava) - } - - if (summaryObject.getDate!= null && !summaryObject.getDate.isEmpty) - r.setPublicationDate(summaryObject.getDate.get(0)) - if (summaryObject.getPublisher!= null && !summaryObject.getPublisher.isEmpty) - { - val plist:List[ScholixEntityId] =summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList - - if (plist.nonEmpty) - r.setPublisher(plist.asJava) - } - - - if (summaryObject.getDatasources!= null && !summaryObject.getDatasources.isEmpty) { - - val l:List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala.map(c => new ScholixCollectedFrom( - new ScholixEntityId(c.getDatasourceName, List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava) - , "collected", "complete" - - )).toList - - if (l.nonEmpty) - r.setCollectedFrom(l.asJava) - - } - r - } - - - - - - def scholixFromSource(relation:Relation, source:ScholixSummary):Scholix = { - - if (relation== null || source== null) - return null - - val s = new Scholix - - var l: List[ScholixEntityId] = extractCollectedFrom(relation) - if (l.isEmpty) - l = extractCollectedFrom(source) - if (l.isEmpty) - return null - - s.setLinkprovider(l.asJava) - - var d = extractRelationDate(relation) - if (d == null) - d = extractRelationDate(source) - - s.setPublicationDate(d) - - - if (source.getPublisher!= null && !source.getPublisher.isEmpty) { - val l: List[ScholixEntityId] = source.getPublisher.asScala - .map{ - p => - new ScholixEntityId(p, null) - }(collection.breakOut) - - if (l.nonEmpty) - s.setPublisher(l.asJava) - } - - val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null) - if (semanticRelation== null) - return null - s.setRelationship(new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)) - s.setSource(generateScholixResourceFromSummary(source)) - - s - } - - - def findURLForPID(pidValue:List[StructuredProperty], urls:List[String]):List[(StructuredProperty, String)] = { - pidValue.map{ - p => - val pv = p.getValue - - val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase)) - (p, r.orNull) - } - } - - - def extractTypedIdentifierFromInstance(r:Result):List[ScholixIdentifier] = { - if (r.getInstance() == null || r.getInstance().isEmpty) - return List() - r.getInstance().asScala.filter(i => i.getUrl!= null && !i.getUrl.isEmpty) - .filter(i => i.getPid!= null && i.getUrl != null) - .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) - .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList - } - - def resultToSummary(r:Result):ScholixSummary = { - val s = new ScholixSummary - s.setId(r.getId) - if (r.getPid == null || r.getPid.isEmpty) - return null - - val persistentIdentifiers:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) - if (persistentIdentifiers.isEmpty) - return null - s.setLocalIdentifier(persistentIdentifiers.asJava) - if (r.isInstanceOf[Publication] ) - s.setTypology(Typology.publication) - else - s.setTypology(Typology.dataset) - - s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) - - if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) { - val titles:List[String] =r.getTitle.asScala.map(t => t.getValue)(collection breakOut) - if (titles.nonEmpty) - s.setTitle(titles.asJava) - else - return null - } - - if(r.getAuthor!= null && !r.getAuthor.isEmpty) { - val authors:List[String] = r.getAuthor.asScala.map(a=> a.getFullname)(collection breakOut) - if (authors nonEmpty) - s.setAuthor(authors.asJava) - } - if (r.getInstance() != null) { - val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut) - if (dt.nonEmpty) - s.setDate(dt.distinct.asJava) - } - if (r.getDescription!= null && !r.getDescription.isEmpty) { - val d = r.getDescription.asScala.find(f => f!= null && f.getValue!=null) - if (d.isDefined) - s.setDescription(d.get.getValue) - } - - if (r.getSubject!= null && !r.getSubject.isEmpty) { - val subjects:List[SchemeValue] =r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))(collection breakOut) - if (subjects.nonEmpty) - s.setSubject(subjects.asJava) - } - - if (r.getPublisher!= null) - s.setPublisher(List(r.getPublisher.getValue).asJava) - - if (r.getCollectedfrom!= null && !r.getCollectedfrom.isEmpty) { - val cf:List[CollectedFromType] = r.getCollectedfrom.asScala.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))(collection breakOut) - if (cf.nonEmpty) - s.setDatasources(cf.distinct.asJava) - } - - s.setRelatedDatasets(0) - s.setRelatedPublications(0) - s.setRelatedUnknown(0) - - s - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml deleted file mode 100644 index fcef2547a..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml +++ /dev/null @@ -1,431 +0,0 @@ - - - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - one among {new, update, version} - - - conceptRecordId - for new version, the id of the record for the old deposition - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software - --communityMapPath${workingDir}/communityMap - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${workingDir}/split - --communityMapPath${workingDir}/communityMap - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/split - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionId${depositionId} - --depositionType${depositionType} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0ae..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml deleted file mode 100644 index 8189e2594..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml +++ /dev/null @@ -1,586 +0,0 @@ - - - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - resultAggregation - true if all the result type have to be dumped under result. false otherwise - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - organizationCommunityMap - the organization community map - - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table organization - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/organization - --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization - --outputPath${workingDir}/collect/organization - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${workingDir}/collect/project - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table datasource - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/datasource - --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource - --outputPath${workingDir}/collect/datasource - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/relation - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${workingDir}/collect/communities_infrastructures/communities_infrastructure.json.gz - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation - --hdfsPath${workingDir}/relation/context - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/contextOrg - --organizationCommunityMap${organizationCommunityMap} - --communityMapPath${workingDir}/communityMap - - - - - - - - - - - - - - - - - yarn - cluster - Extract Relations from publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/relation/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/relation/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/relation/orp - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/relation/software - --communityMapPath${workingDir}/communityMap - - - - - - - - - - yarn - cluster - Collect Results and Relations and put them in the right path - eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir} - --outputPath${workingDir}/collect - --resultAggregation${resultAggregation} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/collect - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json deleted file mode 100644 index d2f179212..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the research community/research infrastructure" - }, - "id": { - "type": "string", - "description": "OpenAIRE id of the research community/research infrastructure" - }, - "name": { - "type": "string", - "description": "The long name of the community" - }, - "originalId": { - "type": "string", - "description": "The acronym of the community" - }, - "subject": { - "description": "Only for research communities: the list of the subjects associated to the research community", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "One of {Research Community, Research infrastructure}" - }, - "zenodo_community": { - "type": "string", - "description": "The URL of the Zenodo community associated to the Research community/Research infrastructure" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json deleted file mode 100644 index b9c15d921..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json +++ /dev/null @@ -1,192 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - } - }, - "type": "object", - "properties": { - "accessrights": { - "type": "string", - "description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}" - }, - "certificates": { - "type": "string", - "description": "The certificate, seal or standard the data source complies with. As defined by re3data.org." - }, - "citationguidelineurl": { - "type": "string", - "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." - }, - "contenttypes": { - "description": "Types of content in the data source, as defined by OpenDOAR", - "type": "array", - "items": { - "type": "string" - } - }, - "databaseaccessrestriction": { - "type": "string", - "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "datasourcetype": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - }, - { - "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" - } - ] - }, - "datauploadrestriction": { - "type": "string", - "description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "dateofvalidation": { - "type": "string", - "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" - }, - "description": { - "type": "string" - }, - "englishname": { - "type": "string", - "description": "The English name of the datasource" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id of the data source" - }, - "journal": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Issue number" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string" - }, - "sp": { - "type": "string", - "description": "Start page" - }, - "vol": { - "type": "string", - "description": "Volume" - } - }, - "description": "Information about the journal, if this data source is of type Journal." - }, - "languages": { - "description": "The languages present in the data source's content, as defined by OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "logourl": { - "type": "string" - }, - "missionstatementurl": { - "type": "string", - "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" - }, - "officialname": { - "type": "string", - "description": "The official name of the datasource" - }, - "openairecompatibility": { - "type": "string", - "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." - }, - "originalId": { - "description": "Original identifiers for the datasource" - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the datasource", - "type": "array", - "items": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - } - ] - } - }, - "pidsystems": { - "type": "string", - "description": "The persistent identifier system that is used by the data source. As defined by re3data.org" - }, - "policies": { - "description": "Policies of the data source, as defined in OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "releaseenddate": { - "type": "string", - "description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org" - }, - "releasestartdate": { - "type": "string", - "description": "Releasing date of the data source, as defined by re3data.org" - }, - "subjects": { - "description": "List of subjects associated to the datasource", - "type": "array", - "items": { - "type": "string" - } - }, - "uploadrights": { - "type": "string", - "description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}" - }, - "versioning": { - "type": "boolean", - "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." - }, - "websiteurl": { - "type": "string" - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json deleted file mode 100644 index 16afa386d..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "alternativenames": { - "description": "Alternative names that identify the organisation", - "type": "array", - "items": { - "type": "string" - } - }, - "country": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The organisation country code" - }, - "label": { - "type": "string", - "description": "The organisation country label" - } - }, - "description": "The country of the organisation" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id for the organisation" - }, - "legalname": { - "type": "string" - }, - "legalshortname": { - "type": "string" - }, - "pid": { - "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", - "type": "array", - "items": { - "type": "object", - "properties": { - "scheme": { - "type": "string", - "description": "The scheme of the identifier (i.e. isni)" - }, - "value": { - "type": "string", - "description": "the value in the schema (i.e. 0000000090326370)" - } - } - } - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json deleted file mode 100644 index c81187258..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "acronym": { - "type": "string" - }, - "callidentifier": { - "type": "string" - }, - "code": { - "type": "string", - "description": "The grant agreement number" - }, - "enddate": { - "type": "string" - }, - "funding": { - "description": "Funding information for the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "funding_stream": { - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the funding stream" - }, - "id": { - "type": "string", - "description": "Id of the funding stream" - } - } - }, - "jurisdiction": { - "type": "string", - "description": "The jurisdiction of the funder (i.e. EU)" - }, - "name": { - "type": "string", - "description": "The name of the funder (European Commission)" - }, - "shortName": { - "type": "string", - "description": "The short name of the funder (EC)" - } - } - } - }, - "granted": { - "type": "object", - "properties": { - "currency": { - "type": "string", - "description": "The currency of the granted amount (e.g. EUR)" - }, - "fundedamount": { - "type": "number", - "description": "The funded amount" - }, - "totalcost": { - "type": "number", - "description": "The total cost of the project" - } - }, - "description": "The money granted to the project" - }, - "h2020programme": { - "description": "The h2020 programme funding the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The code of the programme" - }, - "description": { - "type": "string", - "description": "The description of the programme" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE id for the project" - }, - "keywords": { - "type": "string" - }, - "openaccessmandatefordataset": { - "type": "boolean" - }, - "openaccessmandateforpublications": { - "type": "boolean" - }, - "startdate": { - "type": "string" - }, - "subject": { - "type": "array", - "items": { - "type": "string" - } - }, - "summary": { - "type": "string" - }, - "title": { - "type": "string" - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json deleted file mode 100644 index 7c7de9c98..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "Node": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The OpenAIRE id of the entity" - }, - "type": { - "type": "string", - "description": "The type of the entity (i.e. organisation)" - } - } - } - }, - "type": "object", - "properties": { - "provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The reason why OpenAIRE holds the relation " - }, - "trust": { - "type": "string", - "description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)" - } - } - }, - "reltype": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The semantics of the relation (i.e. isAuthorInstitutionOf). " - }, - "type": { - "type": "string", - "description": "the type of the relation (i.e. affiliation)" - } - }, - "description": "To represent the semantics of a relation between two entities" - }, - "source": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node source in the relation"} - ] - }, - "target": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node target in the relation"} - ] - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json deleted file mode 100644 index 03cbfb074..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json +++ /dev/null @@ -1,398 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "AccessRight":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - }, - "Provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The process that produced/provided the information" - }, - "trust": { - "type": "string" - } - }, - "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" - } - }, - "type": "object", - "properties": { - "author": { - "type": "array", - "items": { - "type": "object", - "properties": { - "fullname": { - "type": "string" - }, - "name": { - "type": "string" - }, - "pid": { - "type": "object", - "properties": { - "id": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"} - ] - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Provenance of author's pid"} - ] - } - } - }, - "rank": { - "type": "integer" - }, - "surname": { - "type": "string" - } - } - } - }, - "bestaccessright": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - }, - "description": "The openest access right associated to the manifestations of this research results" - }, - "codeRepositoryUrl": { - "type": "string", - "description": "Only for results with type 'software': the URL to the repository with the source code" - }, - "contactgroup": { - "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "contactperson": { - "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "container": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string", - "description": "Edition of the journal or conference proceeding" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Journal issue" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string", - "description": "Name of the journal or conference" - }, - "sp": { - "type": "string", - "description": "start page" - }, - "vol": { - "type": "string" - } - }, - "description": "Container has information about the conference or journal where the result has been presented or published" - }, - "contributor": { - "type": "array", - "items": { - "type": "string", - "description": "Description of contributor" - } - }, - "country": { - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "ISO 3166-1 alpha-2 country code" - }, - "label": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is associated to the country."} - ] - } - } - } - }, - "coverage": { - "type": "array", - "items": { - "type": "string" - } - }, - "dateofcollection": { - "type": "string", - "description": "When OpenAIRE collected the record the last time" - }, - "description": { - "type": "array", - "items": { - "type": "string" - } - }, - "documentationUrl": { - "description": "Only for results with type 'software': URL to the software documentation", - "type": "array", - "items": { - "type": "string" - } - }, - "embargoenddate": { - "type": "string", - "description": "Date when the embargo ends and this result turns Open Access" - }, - "format": { - "type": "array", - "items": { - "type": "string" - } - }, - "geolocation": { - "description": "Geolocation information", - "type": "array", - "items": { - "type": "object", - "properties": { - "box": { - "type": "string" - }, - "place": { - "type": "string" - }, - "point": { - "type": "string" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE Identifier" - }, - "language": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "alpha-3/ISO 639-2 code of the language" - }, - "label": { - "type": "string", - "description": "English label" - } - } - }, - "lastupdatetimestamp": { - "type": "integer", - "description": "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle": { - "type": "string" - }, - "originalId": { - "description": "Identifiers of the record at the original sources", - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the result", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "} - ] - } - }, - "instance":{ - "type":"array", - "items":{ - "type":"object", - "properties":{ - "accessright":{ - "allOf":[ - { - "$ref":"#/definitions/AccessRight" - }, - { - "description":"The accessright of this materialization of the result" - } - ] - }, - "articleprocessingcharge":{ - "type":"object", - "properties":{ - "amount":{ - "type":"string" - }, - "currency":{ - "type":"string" - } - } - }, - "license":{ - "type":"string" - }, - "publicationdate":{ - "type":"string" - }, - "refereed":{ - "type":"string" - }, - "type":{ - "type":"string", - "description":"The specific sub-type of this materialization of the result (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" - }, - "url":{ - "description":"Description of url", - "type":"array", - "items":{ - "type":"string", - "description":"urls where it is possible to access the materialization of the result" - } - } - }, - "description":"One of the materialization for this result" - } - }, - "programmingLanguage": { - "type": "string", - "description": "Only for results with type 'software': the programming language" - }, - "publicationdate": { - "type": "string" - }, - "publisher": { - "type": "string" - }, - "size": { - "type": "string", - "description": "Only for results with type 'dataset': the declared size of the dataset" - }, - "source": { - "description": "See definition of Dublin Core field dc:source", - "type": "array", - "items": { - "type": "string" - } - }, - "subjects": { - "description": "Keywords associated to the result", - "type": "array", - "items": { - "type": "object", - "properties": { - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this subject is associated to the result"} - ] - }, - "subject": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} - ] - } - } - } - }, - "subtitle": { - "type": "string" - }, - "tool": { - "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" - }, - "version": { - "type": "string", - "description": "Version of the result" - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0ae..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml deleted file mode 100644 index 650b972fa..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ /dev/null @@ -1,563 +0,0 @@ - - - - - upload - false - true to upload the dump for the funders in Zenodo - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --relationPath${sourcePath}/relation - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/dump/publication - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table dataset for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dump/dataset - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/dump/otherresearchproduct - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/dump/software - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${workingDir}/resultperfunder - --relationPath${sourcePath} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/resultperfunder - - - - - - - - ${wf:conf('upload') eq true} - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json similarity index 73% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json index 2bfcac3bc..5c26ea7d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json @@ -19,6 +19,12 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false + }, + { + "paramName": "rs", + "paramLongName": "removeSet", + "paramDescription": "the list of classname relations, split by ';', not to be dumped", + "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json deleted file mode 100644 index cb092110e..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json +++ /dev/null @@ -1,542 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "AccessRight": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string", - "description": "The scheme for the resource" - }, - "value": { - "type": "string", - "description": "the value in the scheme" - } - } - }, - "KeyValue": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "Description of key" - }, - "value": { - "type": "string", - "description": "Description of value" - } - } - }, - "Provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The provenance of the information" - }, - "trust": { - "type": "string", - "description": "The trust associated to the information" - } - } - } - }, - "type": "object", - "properties": { - "author": { - "description": "List of authors of the research results", - "type": "array", - "items": { - "type": "object", - "properties": { - "affiliation": { - "description": "Affiliations of the author", - "type": "array", - "items": { - "type": "string", - "description": "One of the affiliation of the author" - } - }, - "fullname": { - "type": "string", - "description": "Fullname of the author" - }, - "name": { - "type": "string", - "description": "First name of the author" - }, - "pid": { - "type": "object", - "properties": { - "id": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"} - ] - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "The provenance of the author's pid"} - ] - } - }, - "description": "Persistent identifier of the author (e.g. ORCID)" - }, - "rank": { - "type": "integer", - "description": "Order in which the author appears in the authors list" - }, - "surname": { - "type": "string", - "description": "Surname of the author" - } - }, - "description": "One of the author of the research result" - } - }, - "bestaccessright": { - "allOf": [ - {"$ref": "#/definitions/AccessRight"}, - {"description": "The openest access right associated to the manifestations of this research results"} - ] - }, - "codeRepositoryUrl": { - "type": "string", - "description": "Only for results with type 'software': the URL to the repository with the source code" - }, - "collectedfrom": { - "description": "Information about the sources from which the record has been collected", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Key is the OpenAIRE identifier of the data source, value is its name"} - ] - } - }, - "contactgroup": { - "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "contactperson": { - "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "container": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string", - "description": "Date of the conference" - }, - "conferenceplace": { - "type": "string", - "description": "Place of the conference" - }, - "edition": { - "type": "string", - "description": "Edition of the journal or conference proceeding" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Journal issue" - }, - "issnLinking": { - "type": "string", - "description": "Journal linking iisn" - }, - "issnOnline": { - "type": "string", - "description": "Journal online issn" - }, - "issnPrinted": { - "type": "string", - "description": "Journal printed issn" - }, - "name": { - "type": "string", - "description": "Name of the journal or conference" - }, - "sp": { - "type": "string", - "description": "Start page" - }, - "vol": { - "type": "string", - "description": "Volume" - } - }, - "description": "Container has information about the conference or journal where the result has been presented or published" - }, - "context": { - "description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "Code identifying the RI/RC" - }, - "label": { - "type": "string", - "description": "Label of the RI/RC" - }, - "provenance": { - "description": "Why this result is associated to the RI/RC.", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/Provenance"} - - ] - } - } - } - } - }, - "contributor": { - "description": "Contributors of this result", - "type": "array", - "items": { - "type": "string" - } - }, - "country": { - "description": "Country associated to this result", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "ISO 3166-1 alpha-2 country code" - }, - "label": { - "type": "string", - "description": "English label of the country" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is associated to the country."} - ] - } - } - } - }, - "coverage": { - "type": "array", - "items": { - "type": "string" - } - }, - "dateofcollection": { - "type": "string", - "description": "When OpenAIRE collected the record the last time" - }, - "description": { - "type": "array", - "items": { - "type": "string" - } - }, - "documentationUrl": { - "description": "Only for results with type 'software': URL to the software documentation", - "type": "array", - "items": { - "type": "string" - - } - }, - "embargoenddate": { - "type": "string", - "description": "Date when the embargo ends and this result turns Open Access" - }, - "externalReference": { - "description": "Links to external resources like entries from thematic databases (e.g. Protein Data Bank)", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is linked to the external resource"} - ] - }, - "typology": { - "type": "string" - }, - "value": { - "type": "string" - } - } - } - }, - "format": { - - "type": "array", - "items": { - "type": "string" - } - }, - "geolocation": { - "description": "Geolocation information", - "type": "array", - "items": { - "type": "object", - "properties": { - "box": { - "type": "string" - }, - "place": { - "type": "string" - }, - "point": { - "type": "string" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE identifier" - }, - "instance": { - "description": "Manifestations (i.e. different versions) of the result. For example: the pre-print and the published versions are two manifestations of the same research result", - "type": "array", - "items": { - "type": "object", - "properties": { - "accessright": { - "allOf": [ - {"$ref": "#/definitions/AccessRight"}, - {"description": "Access right of this instance"} - ] - }, - "collectedfrom": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Information about the source from which the instance has been collected. Key is the OpenAIRE identifier of the data source, value is its name"} - ] - }, - "hostedby": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Information about the source from which the instance can be viewed or downloaded. Key is the OpenAIRE identifier of the data source, value is its name"} - ] - }, - "license": { - "type": "string", - "description": "License applied to the instance" - }, - "publicationdate": { - "type": "string", - "description": "Publication date of the instance" - }, - "refereed": { - "type": "string", - "description": "Was the instance subject to peer-review? Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed' (see also https://api.openaire.eu/vocabularies/dnet:review_levels)" - }, - "type": { - "type": "string", - "description": "Type of the instance. Possible values are listed at https://api.openaire.eu/vocabularies/dnet:publication_resource" - }, - "url": { - "description":"Location where the instance is accessible", - "type": "array", - "items": { - "type": "string" - } - } - } - } - }, - "language": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "alpha-3/ISO 639-2 code of the language" - }, - "label": { - "type": "string", - "description": "English label" - } - } - }, - "lastupdatetimestamp": { - "type": "integer", - "description": "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle": { - "type": "string", - "description": "Title" - }, - "originalId": { - "description": "Identifiers of the record at the original sources", - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the result", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "} - ] - } - }, - "programmingLanguage": { - "type": "string", - "description": "Only for results with type 'software': the programming language" - }, - "projects": { - "description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results", - "type": "array", - "items": { - "type": "object", - "properties": { - "acronym": { - "type": "string", - "description": "Project acronym" - }, - "code": { - "type": "string", - "description": "Grant code" - }, - "funder": { - "type": "object", - "properties": { - "fundingStream": { - "type": "string", - "description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)" - }, - "jurisdiction": { - "type": "string", - "description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" - }, - "name": { - "type": "string", - "description": "Name of the funder" - }, - "shortName": { - "type": "string", - "description": "Short name or acronym of the funder" - } - }, - "description": "Information about the funder funding the project" - }, - "id": { - "type": "string", - "description": "OpenAIRE identifier of the project" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this project is associated to the result"} - ] - }, - "title": { - "type": "string", - "description": "Title of the project" - } - } - } - }, - "publicationdate": { - "type": "string", - "description": "Date of publication" - }, - "publisher": { - "type": "string", - "description": "Publisher" - }, - "size": { - "type": "string", - "description": "Only for results with type 'dataset': the declared size of the dataset" - }, - "source": { - "description": "See definition of Dublin Core field dc:source", - "type": "array", - "items": { - "type": "string" - } - }, - "subjects": { - "description": "Keywords associated to the result", - "type": "array", - "items": { - "type": "object", - "properties": { - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this subject is associated to the result"} - ] - }, - "subject": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary). "} - ] - } - } - } - }, - "subtitle": { - "type": "string", - "description": "Sub-title of the result" - }, - "tool": { - "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" - }, - "version": { - "type": "string", - "description": "Version of the result" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml index f84ab7e1a..8eab56992 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -325,6 +325,7 @@ --sourcePath${workingDir}/validrelation --outputPath${workingDir}/relation/relation + --removeSet${removeSet} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml new file mode 100644 index 000000000..2e0ed9aee --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml new file mode 100644 index 000000000..f77b46105 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -0,0 +1,298 @@ + + + + graphBasePath + the input graph base path + + + workingPath + path of the working directory + + + graphOutputPath + path of the output graph + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + group graph entities + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=15000 + + --graphInputPath${graphBasePath} + --outputPath${workingPath}/grouped_entities + + + + + + + + + + + + + + + + + + yarn + cluster + Dispatch publications + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/datasource + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource + + + + + + + + yarn + cluster + Dispatch project + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/project + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project + + + + + + + + yarn + cluster + Dispatch organization + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/organization + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization + + + + + + + + yarn + cluster + Dispatch publication + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + Dispatch dataset + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + Dispatch software + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + yarn + cluster + Dispatch otherresearchproduct + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + + + ${nameNode}/${graphBasePath}/relation + ${nameNode}/${graphOutputPath}/relation + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 46e0eb5e1..7eaec2e2c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -8,3 +8,12 @@ CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.software s union all select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.otherresearchproduct o; + +ANALYZE TABLE ${hiveDbName}.datasource COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.organization COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.project COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.publication COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.dataset COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.otherresearchproduct COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.software COMPUTE STATISTICS; +ANALYZE TABLE ${hiveDbName}.relation COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml index 09930336a..ba5f4f375 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml @@ -292,7 +292,7 @@ yarn cluster - Import table project + Import table relation eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob dhp-graph-mapper-${projectVersion}.jar diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml index fb6e02555..70ecef154 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_organizations/oozie_app/workflow.xml @@ -43,13 +43,18 @@ beta the database schema according to the D-Net infrastructure (beta or production) + + openOrgsAction + openorgs_dedup + The action to be executed during the import for OpenOrgs + isLookupUrl the address of the lookUp service nsPrefixBlacklist - + foo a blacklist of nsprefixes (comma separeted) @@ -156,7 +161,7 @@ --postgresUser${postgresOpenOrgsUser} --postgresPassword${postgresOpenOrgsPassword} --isLookupUrl${isLookupUrl} - --actionopenorgs_dedup + --action${openOrgsAction} --dbschema${dbSchema} --nsPrefixBlacklist${nsPrefixBlacklist} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index ceb13c5e8..74e792f07 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -8,14 +8,32 @@ unresolvedPath the path of the unresolved Entities + + targetPath + the target path after resolution + + + shouldResolveEntities + true + allows to activate/deactivate the resolution process over the entities + - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + yarn @@ -36,11 +54,20 @@ --masteryarn --graphBasePath${graphBasePath} --workingPath${workingDir} + --targetPath${targetPath} - + + + + ${wf:conf('shouldResolveEntities') eq false} + ${wf:conf('shouldResolveEntities') eq true} + + + + yarn @@ -62,11 +89,91 @@ --graphBasePath${graphBasePath} --unresolvedPath${unresolvedPath} --workingPath${workingDir} + --targetPath${targetPath} - + - + + + + + + + + + ${nameNode}/${graphBasePath}/publication + ${nameNode}/${targetPath}/publication + + + + + + + + ${nameNode}/${graphBasePath}/dataset + ${nameNode}/${targetPath}/dataset + + + + + + + + ${nameNode}/${graphBasePath}/otherresearchproduct + ${nameNode}/${targetPath}/otherresearchproduct + + + + + + + + ${nameNode}/${graphBasePath}/software + ${nameNode}/${targetPath}/software + + + + + + + + + + + + + + + + ${nameNode}/${graphBasePath}/organization + ${nameNode}/${targetPath}/organization + + + + + + + + ${nameNode}/${graphBasePath}/project + ${nameNode}/${targetPath}/project + + + + + + + + ${nameNode}/${graphBasePath}/datasource + ${nameNode}/${targetPath}/datasource + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json index f38cc1291..67e315664 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json @@ -2,5 +2,6 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, {"paramName":"u", "paramLongName":"unresolvedPath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target path", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json index 1fbe20648..66a035da5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json @@ -1,5 +1,6 @@ [ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target path", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json index 4b15da623..890570a0b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json @@ -1,6 +1,7 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true}, - {"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"su", "paramLongName":"scholixUpdatePath", "paramDescription": "the scholix updated Path", "paramRequired": false}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true}, + {"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml index 17996c82c..e46e59cc0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml @@ -79,7 +79,7 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=20000 + --conf spark.sql.shuffle.partitions=30000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} @@ -90,68 +90,6 @@ --relationPath${targetPath}/relation - - - - - - - - - - - - - - - - yarn - cluster - Serialize scholix to JSON - eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=6000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --sourcePath${targetPath}/provision/scholix/scholix - --targetPath${targetPath}/index/scholix_json - --objectTypescholix - - - - - - - - - yarn - cluster - Serialize summary to JSON - eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=6000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --sourcePath${targetPath}/provision/summaries_filtered - --targetPath${targetPath}/index/summaries_json - --objectTypesummary - diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json deleted file mode 100644 index 98e8daa18..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/relations.json +++ /dev/null @@ -1,158 +0,0 @@ -{ - "cites":{ - "original":"Cites", - "inverse":"IsCitedBy" - }, - "compiles":{ - "original":"Compiles", - "inverse":"IsCompiledBy" - }, - "continues":{ - "original":"Continues", - "inverse":"IsContinuedBy" - }, - "derives":{ - "original":"IsSourceOf", - "inverse":"IsDerivedFrom" - }, - "describes":{ - "original":"Describes", - "inverse":"IsDescribedBy" - }, - "documents":{ - "original":"Documents", - "inverse":"IsDocumentedBy" - }, - "hasmetadata":{ - "original":"HasMetadata", - "inverse":"IsMetadataOf" - }, - "hasassociationwith":{ - "original":"HasAssociationWith", - "inverse":"HasAssociationWith" - }, - "haspart":{ - "original":"HasPart", - "inverse":"IsPartOf" - }, - "hasversion":{ - "original":"HasVersion", - "inverse":"IsVersionOf" - }, - "iscitedby":{ - "original":"IsCitedBy", - "inverse":"Cites" - }, - "iscompiledby":{ - "original":"IsCompiledBy", - "inverse":"Compiles" - }, - "iscontinuedby":{ - "original":"IsContinuedBy", - "inverse":"Continues" - }, - "isderivedfrom":{ - "original":"IsDerivedFrom", - "inverse":"IsSourceOf" - }, - "isdescribedby":{ - "original":"IsDescribedBy", - "inverse":"Describes" - }, - "isdocumentedby":{ - "original":"IsDocumentedBy", - "inverse":"Documents" - }, - "isidenticalto":{ - "original":"IsIdenticalTo", - "inverse":"IsIdenticalTo" - }, - "ismetadatafor":{ - "original":"IsMetadataFor", - "inverse":"IsMetadataOf" - }, - "ismetadataof":{ - "original":"IsMetadataOf", - "inverse":"IsMetadataFor" - }, - "isnewversionof":{ - "original":"IsNewVersionOf", - "inverse":"IsPreviousVersionOf" - }, - "isobsoletedby":{ - "original":"IsObsoletedBy", - "inverse":"Obsoletes" - }, - "isoriginalformof":{ - "original":"IsOriginalFormOf", - "inverse":"IsVariantFormOf" - }, - "ispartof":{ - "original":"IsPartOf", - "inverse":"HasPart" - }, - "ispreviousversionof":{ - "original":"IsPreviousVersionOf", - "inverse":"IsNewVersionOf" - }, - "isreferencedby":{ - "original":"IsReferencedBy", - "inverse":"References" - }, - "isrelatedto":{ - "original":"IsRelatedTo", - "inverse":"IsRelatedTo" - }, - "isrequiredby":{ - "original":"IsRequiredBy", - "inverse":"Requires" - }, - "isreviewedby":{ - "original":"IsReviewedBy", - "inverse":"Reviews" - }, - "issourceof":{ - "original":"IsSourceOf", - "inverse":"IsDerivedFrom" - }, - "issupplementedby":{ - "original":"IsSupplementedBy", - "inverse":"IsSupplementTo" - }, - "issupplementto":{ - "original":"IsSupplementTo", - "inverse":"IsSupplementedBy" - }, - "isvariantformof":{ - "original":"IsVariantFormOf", - "inverse":"IsOriginalFormOf" - }, - "isversionof":{ - "original":"IsVersionOf", - "inverse":"HasVersion" - }, - "obsoletes":{ - "original":"Obsoletes", - "inverse":"IsObsoletedBy" - }, - "references":{ - "original":"References", - "inverse":"IsReferencedBy" - }, - "requires":{ - "original":"Requires", - "inverse":"IsRequiredBy" - }, - "related":{ - "original":"IsRelatedTo", - "inverse":"IsRelatedTo" - }, - "reviews":{ - "original":"Reviews", - "inverse":"IsReviewedBy" - }, - "unknown":{ - "original":"Unknown", - "inverse":"Unknown" - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/config-default.xml new file mode 100644 index 000000000..6fb2a1253 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/config-default.xml @@ -0,0 +1,10 @@ + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/workflow.xml new file mode 100644 index 000000000..2844d7baa --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/serializeGraph/oozie_app/workflow.xml @@ -0,0 +1,83 @@ + + + + scholixUpdatePath + the working dir base path of the scholix updated + + + targetPath + the final graph path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + yarn + cluster + Serialize scholix to JSON + eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=6000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --sourcePath${targetPath}/provision/scholix/scholix + --targetPath${targetPath}/index/scholix_json + --scholixUpdatePath${scholixUpdatePath} + --objectTypescholix + + + + + + + + + yarn + cluster + Serialize summary to JSON + eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=6000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --sourcePath${targetPath}/provision/summaries_filtered + --targetPath${targetPath}/index/summaries_json + --objectTypesummary + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala new file mode 100644 index 000000000..c5a2b4024 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -0,0 +1,195 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} + +case class HostedByItemType( + id: String, + officialname: String, + issn: String, + eissn: String, + lissn: String, + openAccess: Boolean +) {} + +case class HostedByInfo( + id: String, + officialname: String, + journal_id: String, + provenance: String, + id_type: String +) {} + +object Aggregators { + + def getId(s1: String, s2: String): String = { + if (s1.startsWith("10|")) { + return s1 + } + s2 + } + + def getValue(s1: String, s2: String): String = { + if (!s1.equals("")) { + return s1 + } + s2 + } + + def explodeHostedByItemType( + df: Dataset[(String, HostedByItemType)] + ): Dataset[(String, HostedByItemType)] = { + val transformedData: Dataset[(String, HostedByItemType)] = df + .groupByKey(_._1)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map { case (id: String, res: (String, HostedByItemType)) => + res + }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + transformedData + } + + val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = + new Aggregator[ + (String, HostedByItemType), + (String, HostedByItemType), + (String, HostedByItemType) + ] { + + override def zero: (String, HostedByItemType) = + ("", HostedByItemType("", "", "", "", "", false)) + + override def reduce( + b: (String, HostedByItemType), + a: (String, HostedByItemType) + ): (String, HostedByItemType) = { + return merge(b, a) + } + + override def merge( + b1: (String, HostedByItemType), + b2: (String, HostedByItemType) + ): (String, HostedByItemType) = { + if (b1 == null) { + return b2 + } + if (b2 == null) { + return b1 + } + if (b1._2.id.startsWith("10|")) { + return ( + b1._1, + HostedByItemType( + b1._2.id, + b1._2.officialname, + b1._2.issn, + b1._2.eissn, + b1._2.lissn, + b1._2.openAccess || b2._2.openAccess + ) + ) + + } + return ( + b2._1, + HostedByItemType( + b2._2.id, + b2._2.officialname, + b2._2.issn, + b2._2.eissn, + b2._2.lissn, + b1._2.openAccess || b2._2.openAccess + ) + ) + + } + + override def finish(reduction: (String, HostedByItemType)): (String, HostedByItemType) = + reduction + + override def bufferEncoder: Encoder[(String, HostedByItemType)] = + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + + override def outputEncoder: Encoder[(String, HostedByItemType)] = + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + }.toColumn + + def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = + new Aggregator[EntityInfo, EntityInfo, EntityInfo] { + override def zero: EntityInfo = EntityInfo.newInstance("", "", "") + + override def reduce(b: EntityInfo, a: EntityInfo): EntityInfo = { + return merge(b, a) + } + + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null) { + return b2 + } + if (b2 == null) { + return b1 + } + if (!b1.getHostedById.equals("")) { + b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) + return b1 + } + b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + def resultToSingleId(df: Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData: Dataset[EntityInfo] = df + .groupByKey(_.getId)(Encoders.STRING) + .agg(Aggregators.resultToSingleIdAggregator) + .map { case (id: String, res: EntityInfo) => + res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } + + def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = + new Aggregator[EntityInfo, EntityInfo, EntityInfo] { + override def zero: EntityInfo = EntityInfo.newInstance("", "", "") + + override def reduce(b: EntityInfo, a: EntityInfo): EntityInfo = { + return merge(b, a) + } + + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null) { + return b2 + } + if (b2 == null) { + return b1 + } + if (!b1.getHostedById.equals("")) { + return b1 + } + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + def datasourceToSingleId(df: Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData: Dataset[EntityInfo] = df + .groupByKey(_.getHostedById)(Encoders.STRING) + .agg(Aggregators.datasourceToSingleIdAggregator) + .map { case (id: String, res: EntityInfo) => + res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala similarity index 65% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 1b18ba3ae..80c672929 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -2,20 +2,20 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} object SparkApplyHostedByMapToDatasource { def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { - dats.joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") + dats + .joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") .map(t2 => { val d: Datasource = t2._1 if (t2._2 != null) { @@ -32,14 +32,21 @@ object SparkApplyHostedByMapToDatasource { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") @@ -52,20 +59,27 @@ object SparkApplyHostedByMapToDatasource { val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + val dats: Dataset[Datasource] = spark.read + .textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) + val pinfo: Dataset[EntityInfo] = Aggregators.datasourceToSingleId( + spark.read + .textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + ) - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoDats(pinfo, dats).write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath) - spark.read.textFile(outputPath) + spark.read + .textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/datasource") } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala similarity index 62% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 0e047d016..a900fc241 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -5,31 +5,37 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import eu.dnetlib.dhp.schema.oaf.{Instance, OpenAccessRoute, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ - object SparkApplyHostedByMapToResult { def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { - pubs.joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") + pubs + .joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") .map(t2 => { val p: Publication = t2._1 if (t2._2 != null) { val ei: EntityInfo = t2._2 val i = p.getInstance().asScala if (i.size == 1) { - val inst: Instance = i(0) + val inst: Instance = i.head inst.getHostedby.setKey(ei.getHostedById) inst.getHostedby.setValue(ei.getName) if (ei.getOpenAccess) { - inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) + inst.setAccessright( + OafMapperUtils.accessRight( + ModelConstants.ACCESS_RIGHT_OPEN, + "Open Access", + ModelConstants.DNET_ACCESS_MODES, + ModelConstants.DNET_ACCESS_MODES + ) + ) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } @@ -39,48 +45,57 @@ object SparkApplyHostedByMapToResult { p })(Encoders.bean(classOf[Publication])) } - def main(args: Array[String]): Unit = { + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") val preparedInfoPath = parser.get("preparedInfoPath") - implicit val formats = DefaultFormats - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + val pubs: Dataset[Publication] = spark.read + .textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + val pinfo: Dataset[EntityInfo] = spark.read + .textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + applyHBtoPubs(pinfo, pubs).write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath) - spark.read.textFile(outputPath) + spark.read + .textFile(outputPath) .write .mode(SaveMode.Overwrite) - .option("compression","gzip") + .option("compression", "gzip") .text(graphPath + "/publication") } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala similarity index 57% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index b7a7d352f..34798b147 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,123 +3,125 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo - import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} - - object SparkPrepareHostedByInfoToApply { implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { - var lst:List[EntityInfo] = List() + def getList(id: String, j: Journal, name: String): List[EntityInfo] = { + var lst: List[EntityInfo] = List() - - if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst } - if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst } - if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")) { lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst } lst } - def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + def prepareResultInfo(spark: SparkSession, publicationPath: String): Dataset[EntityInfo] = { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) val mapper = new ObjectMapper() - val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + val dd: Dataset[Publication] = spark.read + .textFile(publicationPath) .map(r => mapper.readValue(r, classOf[Publication])) - dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + dd.filter(p => p.getJournal != null).flatMap(p => getList(p.getId, p.getJournal, "")) } - - def toEntityInfo(input:String): EntityInfo = { + def toEntityInfo(input: String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + val c: Map[String, HostedByItemType] = json.extract[Map[String, HostedByItemType]] toEntityItem(c.keys.head, c.values.head) } - - def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + def toEntityItem(journal_id: String, hbi: HostedByItemType): EntityInfo = { EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) } def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { - Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") - .map(t2 => { - val res: EntityInfo = t2._1 - if(t2._2 != null ){ - val ds = t2._2 - res.setHostedById(ds.getId) - res.setOpenAccess(ds.getOpenAccess) - res.setName(ds.getName) - } - res - })) + Aggregators.resultToSingleId( + res + .joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if (t2._2 != null) { + val ds = t2._2 + res.setHostedById(ds.getId) + res.setOpenAccess(ds.getOpenAccess) + res.setName(ds.getName) + } + res + }) + ) } def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val graphPath = parser.get("graphPath") val outputPath = parser.get("preparedInfoPath") val hostedByMapPath = parser.get("hostedByMapPath") - implicit val formats = DefaultFormats - logger.info("Getting the Datasources") import spark.implicits._ - //STEP1: read the hostedbymap and transform it in EntityInfo - val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + val hostedByInfo: Dataset[EntityInfo] = + spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) - val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) + val resultInfoDataset: Dataset[EntityInfo] = + prepareResultInfo(spark, graphPath + "/publication") //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) //to this entry we add the id of the datasource for the next step - joinResHBM(resultInfoDataset, hostedByInfo) - .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) - + joinResHBM(resultInfoDataset, hostedByInfo).write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath) } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala new file mode 100644 index 000000000..8d8965866 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -0,0 +1,273 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.schema.oaf.Datasource +import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.io.compress.GzipCodec +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +import java.io.PrintWriter + +object SparkProduceHostedByMap { + + implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)): HostedByItemType = { + val openaire: HostedByInfo = input._1._1 + val doaj: HostedByInfo = input._1._2 + val gold: HostedByInfo = input._2 + val isOpenAccess: Boolean = doaj == null && gold == null + + openaire.journal_id match { + case Constants.ISSN => + HostedByItemType( + openaire.id, + openaire.officialname, + openaire.journal_id, + "", + "", + isOpenAccess + ) + case Constants.EISSN => + HostedByItemType( + openaire.id, + openaire.officialname, + "", + openaire.journal_id, + "", + isOpenAccess + ) + case Constants.ISSNL => + HostedByItemType( + openaire.id, + openaire.officialname, + "", + "", + openaire.journal_id, + isOpenAccess + ) + + // catch the default with a variable so you can print it + case whoa => null + } + } + + def toHostedByMap(input: (String, HostedByItemType)): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + val map: Map[String, HostedByItemType] = Map(input._1 -> input._2) + + Serialization.write(map) + + } + + def getHostedByItemType( + id: String, + officialname: String, + issn: String, + eissn: String, + issnl: String, + oa: Boolean + ): HostedByItemType = { + if (issn != null) { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, issn, eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, issn, eissn, "", oa) + } + } else { + if (issnl != null) { + HostedByItemType(id, officialname, issn, "", issnl, oa) + } else { + HostedByItemType(id, officialname, issn, "", "", oa) + } + } + } else { + if (eissn != null) { + if (issnl != null) { + HostedByItemType(id, officialname, "", eissn, issnl, oa) + } else { + HostedByItemType(id, officialname, "", eissn, "", oa) + } + } else { + if (issnl != null) { + HostedByItemType(id, officialname, "", "", issnl, oa) + } else { + HostedByItemType("", "", "", "", "", oa) + } + } + } + } + + def oaToHostedbyItemType(dats: Datasource): HostedByItemType = { + if (dats.getJournal != null) { + + return getHostedByItemType( + dats.getId, + dats.getOfficialname.getValue, + dats.getJournal.getIssnPrinted, + dats.getJournal.getIssnOnline, + dats.getJournal.getIssnLinking, + false + ) + } + HostedByItemType("", "", "", "", "", false) + } + + def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { + + import spark.implicits._ + + val mapper = new ObjectMapper() + + implicit var encoderD = Encoders.kryo[Datasource] + + val dd: Dataset[Datasource] = spark.read + .textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.map { ddt => oaToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + + } + + def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { + return getHostedByItemType( + Constants.UNIBI, + gold.getTitle, + gold.getIssn, + "", + gold.getIssnL, + true + ) + } + + def goldHostedByDataset( + spark: SparkSession, + datasourcePath: String + ): Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + + val mapper = new ObjectMapper() + + val dd: Dataset[UnibiGoldModel] = spark.read + .textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) + + dd.map { ddt => goldToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + + } + + def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { + + return getHostedByItemType( + Constants.DOAJ, + doaj.getJournalTitle, + doaj.getIssn, + doaj.getEissn, + "", + true + ) + } + + def doajHostedByDataset( + spark: SparkSession, + datasourcePath: String + ): Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + + val mapper = new ObjectMapper() + + val dd: Dataset[DOAJModel] = spark.read + .textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[DOAJModel])) + + dd.map { ddt => doajToHostedbyItemType(ddt) }.filter(hb => !(hb.id.equals(""))) + + } + + def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { + var lst: List[(String, HostedByItemType)] = List() + if (!input.issn.equals("")) { + lst = (input.issn, input) :: lst + } + if (!input.eissn.equals("")) { + lst = (input.eissn, input) :: lst + } + if (!input.lissn.equals("")) { + lst = (input.lissn, input) :: lst + } + lst + } + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode: String): Unit = { + val conf = new Configuration() + + conf.set("fs.defaultFS", hdfsNameNode) + val fs = FileSystem.get(conf) + val output = fs.create(new Path(outputPath)) + val writer = new PrintWriter(output) + try { + input.foreach(hbi => writer.println(hbi)) + } finally { + writer.close() + + } + + } + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val datasourcePath = parser.get("datasourcePath") + val workingDirPath = parser.get("workingPath") + val outputPath = parser.get("outputPath") + + implicit val formats = DefaultFormats + + logger.info("Getting the Datasources") + + Aggregators + .explodeHostedByItemType( + oaHostedByDataset(spark, datasourcePath) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) + .flatMap(hbi => toList(hbi)) + ) + .filter(hbi => hbi._2.id.startsWith("10|")) + .map(hbi => toHostedByMap(hbi))(Encoders.STRING) + .rdd + .saveAsTextFile(outputPath, classOf[GzipCodec]) + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala new file mode 100644 index 000000000..533948289 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -0,0 +1,101 @@ +package eu.dnetlib.dhp.oa.graph.raw + +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport +import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.utils.DHPUtils +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse +import org.slf4j.LoggerFactory + +import scala.collection.JavaConverters._ +import scala.io.Source + +object CopyHdfsOafSparkApplication { + + def main(args: Array[String]): Unit = { + val log = LoggerFactory.getLogger(getClass) + val conf = new SparkConf() + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json") + ) + .mkString + ) + parser.parseArgument(args) + + val spark = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val sc: SparkContext = spark.sparkContext + + val mdstoreManagerUrl = parser.get("mdstoreManagerUrl") + log.info("mdstoreManagerUrl: {}", mdstoreManagerUrl) + + val mdFormat = parser.get("mdFormat") + log.info("mdFormat: {}", mdFormat) + + val mdLayout = parser.get("mdLayout") + log.info("mdLayout: {}", mdLayout) + + val mdInterpretation = parser.get("mdInterpretation") + log.info("mdInterpretation: {}", mdInterpretation) + + val hdfsPath = parser.get("hdfsPath") + log.info("hdfsPath: {}", hdfsPath) + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + + val paths = + DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala + + val validPaths: List[String] = + paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList + + val types = ModelSupport.oafTypes.entrySet.asScala + .map(e => Tuple2(e.getKey, e.getValue)) + + if (validPaths.nonEmpty) { + val oaf = spark.read.textFile(validPaths: _*) + val mapper = + new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + + types.foreach(t => + oaf + .filter(o => isOafType(o, t._1)) + .map(j => mapper.readValue(j, t._2).asInstanceOf[Oaf]) + .map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .write + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(s"$hdfsPath/${t._1}") + ) + } + } + + def isOafType(input: String, oafType: String): Boolean = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + if (oafType == "relation") { + val hasSource = (json \ "source").extractOrElse[String](null) + val hasTarget = (json \ "target").extractOrElse[String](null) + + hasSource != null && hasTarget != null + } else { + val hasId = (json \ "id").extractOrElse[String](null) + val resultType = (json \ "resulttype" \ "classid").extractOrElse[String](null) + hasId != null && oafType.equalsIgnoreCase(resultType) + } + + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala new file mode 100644 index 000000000..f5a13e72b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -0,0 +1,140 @@ +package eu.dnetlib.dhp.oa.graph.resolution + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.common.EntityType +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} +import org.apache.commons.io.IOUtils +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +object SparkResolveEntities { + + val mapper = new ObjectMapper() + + val entities = List( + EntityType.dataset, + EntityType.publication, + EntityType.software, + EntityType.otherresearchproduct + ) + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json" + ) + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val graphBasePath = parser.get("graphBasePath") + log.info(s"graphBasePath -> $graphBasePath") + val workingPath = parser.get("workingPath") + log.info(s"workingPath -> $workingPath") + val unresolvedPath = parser.get("unresolvedPath") + log.info(s"unresolvedPath -> $unresolvedPath") + + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) + fs.mkdirs(new Path(workingPath)) + + resolveEntities(spark, workingPath, unresolvedPath) + generateResolvedEntities(spark, workingPath, graphBasePath, targetPath) + } + + def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + import spark.implicits._ + + val rPid: Dataset[(String, String)] = + spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] + val up: Dataset[(String, Result)] = spark.read + .text(unresolvedPath) + .as[String] + .map(s => mapper.readValue(s, classOf[Result])) + .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + + rPid + .joinWith(up, rPid("_2").equalTo(up("_1")), "inner") + .map { r => + val result = r._2._2 + val dnetId = r._1._1 + result.setId(dnetId) + result + } + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/resolvedEntities") + } + + def deserializeObject(input: String, entity: EntityType): Result = { + + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software => mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) + } + } + + def generateResolvedEntities( + spark: SparkSession, + workingPath: String, + graphBasePath: String, + targetPath: String + ) = { + + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + import spark.implicits._ + + val re: Dataset[(String, Result)] = spark.read + .load(s"$workingPath/resolvedEntities") + .as[Result] + .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + entities.foreach { e => + { + + val currentEntityDataset: Dataset[(String, Result)] = spark.read + .text(s"$graphBasePath/$e") + .as[String] + .map(s => deserializeObject(s, e)) + .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + + currentEntityDataset + .joinWith(re, currentEntityDataset("_1").equalTo(re("_1")), "left") + .map(k => { + + val a = k._1 + val b = k._2 + if (b == null) + a._2 + else { + a._2.mergeFrom(b._2) + a._2 + } + }) + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(s"$targetPath/$e") + } + + } + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala similarity index 68% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index cd517dd5e..2567a30a6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.oaf.{Relation, Result} +import eu.dnetlib.dhp.schema.oaf.Relation import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileSystem, Path} @@ -17,28 +17,37 @@ import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} object SparkResolveRelation { + def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json" + ) + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - + .master(parser.get("master")) + .getOrCreate() val graphBasePath = parser.get("graphBasePath") log.info(s"graphBasePath -> $graphBasePath") val workingPath = parser.get("workingPath") log.info(s"workingPath -> $workingPath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) import spark.implicits._ - //CLEANING TEMPORARY FOLDER HdfsSupport.remove(workingPath, spark.sparkContext.hadoopConfiguration) val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) @@ -48,85 +57,86 @@ object SparkResolveRelation { val mapper: ObjectMapper = new ObjectMapper() - val rPid: Dataset[(String, String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] + val rPid: Dataset[(String, String)] = + spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] - val relationDs: Dataset[(String, Relation)] = spark.read.text(s"$graphBasePath/relation").as[String] - .map(s => mapper.readValue(s, classOf[Relation])).as[Relation] + val relationDs: Dataset[(String, Relation)] = spark.read + .text(s"$graphBasePath/relation") + .as[String] + .map(s => mapper.readValue(s, classOf[Relation])) + .as[Relation] .map(r => (r.getSource.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - relationDs.joinWith(rPid, relationDs("_1").equalTo(rPid("_2")), "left").map { - m => + relationDs + .joinWith(rPid, relationDs("_1").equalTo(rPid("_2")), "left") + .map { m => val sourceResolved = m._2 val currentRelation = m._1._2 if (sourceResolved != null && sourceResolved._1 != null && sourceResolved._1.nonEmpty) currentRelation.setSource(sourceResolved._1) currentRelation - }.write + } + .write .mode(SaveMode.Overwrite) .save(s"$workingPath/relationResolvedSource") - - val relationSourceResolved: Dataset[(String, Relation)] = spark.read.load(s"$workingPath/relationResolvedSource").as[Relation] + val relationSourceResolved: Dataset[(String, Relation)] = spark.read + .load(s"$workingPath/relationResolvedSource") + .as[Relation] .map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left").map { - m => + relationSourceResolved + .joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left") + .map { m => val targetResolved = m._2 val currentRelation = m._1._2 if (targetResolved != null && targetResolved._1.nonEmpty) currentRelation.setTarget(targetResolved._1) currentRelation - } + } .write .mode(SaveMode.Overwrite) .save(s"$workingPath/relation_resolved") - - // TO BE conservative we keep the original relation in the working dir - // and save the relation resolved on the graphBasePath - //In future this two line of code should be removed - - fs.rename(new Path(s"$graphBasePath/relation"), new Path(s"$workingPath/relation")) - - spark.read.load(s"$workingPath/relation_resolved").as[Relation] + spark.read + .load(s"$workingPath/relation_resolved") + .as[Relation] .filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) .map(r => mapper.writeValueAsString(r)) .write .option("compression", "gzip") .mode(SaveMode.Overwrite) - .text(s"$graphBasePath/relation") + .text(s"$targetPath/relation") } def extractInstanceCF(input: String): List[(String, String)] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val result: List[(String, String)] = for { - JObject(iObj) <- json \ "instance" - JField("collectedfrom", JObject(cf)) <- iObj + JObject(iObj) <- json \ "instance" + JField("collectedfrom", JObject(cf)) <- iObj JField("instancetype", JObject(instancetype)) <- iObj - JField("value", JString(collectedFrom)) <- cf - JField("classname", JString(classname)) <- instancetype + JField("value", JString(collectedFrom)) <- cf + JField("classname", JString(classname)) <- instancetype } yield (classname, collectedFrom) result } - def extractPidsFromRecord(input: String): (String, List[(String, String)]) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val id: String = (json \ "id").extract[String] val result: List[(String, String)] = for { - JObject(pids) <- json \\ "instance" \ "pid" - JField("value", JString(pidValue)) <- pids + JObject(pids) <- json \\ "instance" \ "pid" + JField("value", JString(pidValue)) <- pids JField("qualifier", JObject(qualifier)) <- pids - JField("classid", JString(pidType)) <- qualifier + JField("classid", JString(pidType)) <- qualifier } yield (pidValue, pidType) (id, result) } - private def isRelation(input: String): Boolean = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -136,20 +146,25 @@ object SparkResolveRelation { source != null } - def extractPidResolvedTableFromJsonRDD(spark: SparkSession, graphPath: String, workingPath: String) = { + def extractPidResolvedTableFromJsonRDD( + spark: SparkSession, + graphPath: String, + workingPath: String + ) = { import spark.implicits._ - val d: RDD[(String, String)] = spark.sparkContext.textFile(s"$graphPath/*") + val d: RDD[(String, String)] = spark.sparkContext + .textFile(s"$graphPath/*") .filter(i => !isRelation(i)) .map(i => extractPidsFromRecord(i)) .filter(s => s != null && s._1 != null && s._2 != null && s._2.nonEmpty) .flatMap { p => - p._2.map(pid => - (p._1, DHPUtils.generateUnresolvedIdentifier(pid._1, pid._2)) - ) - }.filter(r => r._1 != null || r._2 != null) + p._2.map(pid => (p._1, DHPUtils.generateUnresolvedIdentifier(pid._1, pid._2))) + } + .filter(r => r._1 != null || r._2 != null) - spark.createDataset(d) + spark + .createDataset(d) .groupByKey(_._2) .reduceGroups((x, y) => if (x._1.startsWith("50|doi") || x._1.startsWith("50|pmid")) x else y) .map(s => s._2) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala similarity index 70% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala index 9e905d806..79b1c22cd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/sx/graphimport/SparkDataciteToOAF.scala @@ -7,25 +7,26 @@ import org.apache.spark.sql.SparkSession object SparkDataciteToOAF { - def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - import spark.implicits._ - + .master(parser.get("master")) + .getOrCreate() val sc = spark.sparkContext val inputPath = parser.get("inputPath") - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala similarity index 56% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 3ee0c7dd6..fb90531c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.Result import org.apache.commons.io.IOUtils import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf @@ -11,31 +11,39 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertDatasetToJsonRDD { - def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - val resultObject = List("publication","dataset","software", "otherResearchProduct") + val resultObject = List("publication", "dataset", "software", "otherResearchProduct") val mapper = new ObjectMapper() - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - - resultObject.foreach{item => - spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + resultObject.foreach { item => + spark.read + .load(s"$sourcePath/$item") + .as[Result] + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .rdd + .saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala similarity index 60% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala index 846ac37af..bfa07eb69 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertObjectToJson.scala @@ -5,24 +5,29 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import org.apache.hadoop.io.compress._ object SparkConvertObjectToJson { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_object_json_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") @@ -30,11 +35,11 @@ object SparkConvertObjectToJson { log.info(s"targetPath -> $targetPath") val objectType = parser.get("objectType") log.info(s"objectType -> $objectType") + val scholixUpdatePath = parser.get("scholixUpdatePath") + log.info(s"scholixUpdatePath -> $scholixUpdatePath") - - implicit val scholixEncoder :Encoder[Scholix]= Encoders.kryo[Scholix] - implicit val summaryEncoder :Encoder[ScholixSummary]= Encoders.kryo[ScholixSummary] - + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] val mapper = new ObjectMapper @@ -42,11 +47,19 @@ object SparkConvertObjectToJson { case "scholix" => log.info("Serialize Scholix") val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(6000).saveAsTextFile(targetPath, classOf[GzipCodec]) + val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix] + d.union(u) + .repartition(8000) + .map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .rdd + .saveAsTextFile(targetPath, classOf[GzipCodec]) case "summary" => log.info("Serialize Summary") val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary] - d.map(s => mapper.writeValueAsString(s))(Encoders.STRING).rdd.repartition(1000).saveAsTextFile(targetPath, classOf[GzipCodec]) + d.map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .rdd + .repartition(1000) + .saveAsTextFile(targetPath, classOf[GzipCodec]) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala new file mode 100644 index 000000000..f13c14da5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -0,0 +1,114 @@ +package eu.dnetlib.dhp.sx.graph + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkConvertRDDtoDataset { + + def main(args: Array[String]): Unit = { + + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + val t = parser.get("targetPath") + log.info(s"targetPath -> $t") + + val entityPath = s"$t/entities" + val relPath = s"$t/relation" + val mapper = new ObjectMapper() + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = + Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + + log.info("Converting dataset") + val rddDataset = spark.sparkContext + .textFile(s"$sourcePath/dataset") + .map(s => mapper.readValue(s, classOf[OafDataset])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddDataset) + .as[OafDataset] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/dataset") + + log.info("Converting publication") + val rddPublication = spark.sparkContext + .textFile(s"$sourcePath/publication") + .map(s => mapper.readValue(s, classOf[Publication])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddPublication) + .as[Publication] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/publication") + + log.info("Converting software") + val rddSoftware = spark.sparkContext + .textFile(s"$sourcePath/software") + .map(s => mapper.readValue(s, classOf[Software])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddSoftware) + .as[Software] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/software") + + log.info("Converting otherresearchproduct") + val rddOtherResearchProduct = spark.sparkContext + .textFile(s"$sourcePath/otherresearchproduct") + .map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + spark + .createDataset(rddOtherResearchProduct) + .as[OtherResearchProduct] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/otherresearchproduct") + + log.info("Converting Relation") + + val relationSemanticFilter = List( + "cites", + "iscitedby", + "merges", + "ismergedin", + "HasAmongTopNSimilarDocuments", + "IsAmongTopNSimilarDocuments" + ) + + val rddRelation = spark.sparkContext + .textFile(s"$sourcePath/relation") + .map(s => mapper.readValue(s, classOf[Relation])) + .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) + .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) + spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") + + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala new file mode 100644 index 000000000..9d57e5869 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -0,0 +1,144 @@ +package eu.dnetlib.dhp.sx.graph + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +object SparkCreateInputGraph { + + def main(args: Array[String]): Unit = { + + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/extract_entities_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val resultObject = List( + ("publication", classOf[Publication]), + ("dataset", classOf[OafDataset]), + ("software", classOf[Software]), + ("otherResearchProduct", classOf[OtherResearchProduct]) + ) + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = + Encoders.kryo(classOf[OtherResearchProduct]) + implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val oafDs: Dataset[Oaf] = spark.read.load(s"$sourcePath/*").as[Oaf] + + log.info("Extract Publication") + oafDs + .filter(o => o.isInstanceOf[Publication]) + .map(p => p.asInstanceOf[Publication]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/publication") + + log.info("Extract dataset") + oafDs + .filter(o => o.isInstanceOf[OafDataset]) + .map(p => p.asInstanceOf[OafDataset]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/dataset") + + log.info("Extract software") + oafDs + .filter(o => o.isInstanceOf[Software]) + .map(p => p.asInstanceOf[Software]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/software") + + log.info("Extract otherResearchProduct") + oafDs + .filter(o => o.isInstanceOf[OtherResearchProduct]) + .map(p => p.asInstanceOf[OtherResearchProduct]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/otherResearchProduct") + + log.info("Extract Relation") + oafDs + .filter(o => o.isInstanceOf[Relation]) + .map(p => p.asInstanceOf[Relation]) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/extracted/relation") + + resultObject.foreach { r => + log.info(s"Make ${r._1} unique") + makeDatasetUnique( + s"$targetPath/extracted/${r._1}", + s"$targetPath/preprocess/${r._1}", + spark, + r._2 + ) + } + } + + def extractEntities[T <: Oaf]( + oafDs: Dataset[Oaf], + targetPath: String, + clazz: Class[T], + log: Logger + ): Unit = { + + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + log.info(s"Extract ${clazz.getSimpleName}") + oafDs + .filter(o => o.isInstanceOf[T]) + .map(p => p.asInstanceOf[T]) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) + } + + def makeDatasetUnique[T <: Result]( + sourcePath: String, + targetPath: String, + spark: SparkSession, + clazz: Class[T] + ): Unit = { + import spark.implicits._ + + implicit val resEncoder: Encoder[T] = Encoders.kryo(clazz) + + val ds: Dataset[T] = spark.read.load(sourcePath).as[T] + + ds.groupByKey(_.getId) + .reduceGroups { (x, y) => + x.mergeFrom(y) + x + } + .map(_._2) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala new file mode 100644 index 000000000..af19b9698 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -0,0 +1,158 @@ +package eu.dnetlib.dhp.sx.graph + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.Relation +import eu.dnetlib.dhp.schema.sx.scholix.Scholix +import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary +import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils +import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.RelatedEntities +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.functions.count +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +object SparkCreateScholix { + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_scholix_params.json") + ) + ) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")) + .getOrCreate() + + val relationPath = parser.get("relationPath") + log.info(s"relationPath -> $relationPath") + val summaryPath = parser.get("summaryPath") + log.info(s"summaryPath -> $summaryPath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + implicit val relEncoder: Encoder[Relation] = Encoders.kryo[Relation] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix] + + import spark.implicits._ + + val relationDS: Dataset[(String, Relation)] = spark.read + .load(relationPath) + .as[Relation] + .filter(r => + (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase + .contains("merge") + ) + .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) + + val summaryDS: Dataset[(String, ScholixSummary)] = spark.read + .load(summaryPath) + .as[ScholixSummary] + .map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, summaryEncoder)) + + relationDS + .joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") + .map { input: ((String, Relation), (String, ScholixSummary)) => + if (input._1 != null && input._2 != null) { + val rel: Relation = input._1._2 + val source: ScholixSummary = input._2._2 + (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) + } else null + }(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .filter(r => r != null) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/scholix_from_source") + + val scholixSource: Dataset[(String, Scholix)] = spark.read + .load(s"$targetPath/scholix_from_source") + .as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) + + scholixSource + .joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left") + .map { input: ((String, Scholix), (String, ScholixSummary)) => + if (input._2 == null) { + null + } else { + val s: Scholix = input._1._2 + val target: ScholixSummary = input._2._2 + ScholixUtils.generateCompleteScholix(s, target) + } + } + .filter(s => s != null) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/scholix_one_verse") + + val scholix_o_v: Dataset[Scholix] = + spark.read.load(s"$targetPath/scholix_one_verse").as[Scholix] + + scholix_o_v + .flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s))) + .as[Scholix] + .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .groupByKey(_._1) + .agg(ScholixUtils.scholixAggregator.toColumn) + .map(s => s._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/scholix") + + val scholix_final: Dataset[Scholix] = spark.read.load(s"$targetPath/scholix").as[Scholix] + + val stats: Dataset[(String, String, Long)] = scholix_final + .map(s => (s.getSource.getDnetIdentifier, s.getTarget.getObjectType)) + .groupBy("_1", "_2") + .agg(count("_1")) + .as[(String, String, Long)] + + stats + .map(s => + RelatedEntities( + s._1, + if ("dataset".equalsIgnoreCase(s._2)) s._3 else 0, + if ("publication".equalsIgnoreCase(s._2)) s._3 else 0 + ) + ) + .groupByKey(_.id) + .reduceGroups((a, b) => + RelatedEntities( + a.id, + a.relatedDataset + b.relatedDataset, + a.relatedPublication + b.relatedPublication + ) + ) + .map(_._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$targetPath/related_entities") + + val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read + .load(s"$targetPath/related_entities") + .as[RelatedEntities] + .filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) + + relatedEntitiesDS + .joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner") + .map { i => + val re = i._1 + val sum = i._2._2 + + sum.setRelatedDatasets(re.relatedDataset) + sum.setRelatedPublications(re.relatedPublication) + sum + } + .write + .mode(SaveMode.Overwrite) + .save(s"${summaryPath}_filtered") + + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala similarity index 51% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index 0970375f5..6d489e8cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkCreateSummaryObject { @@ -14,29 +14,41 @@ object SparkCreateSummaryObject { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_summaries_params.json"))) + val parser = new ArgumentApplicationParser( + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_summaries_params.json") + ) + ) parser.parseArgument(args) val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] - implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] + val ds: Dataset[Result] = spark.read + .load(s"$sourcePath/*") + .as[Result] + .filter(r => r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) - - ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) + ds.repartition(6000) + .map(r => ScholixUtils.resultToSummary(r)) + .filter(s => s != null) + .write + .mode(SaveMode.Overwrite) + .save(targetPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala new file mode 100644 index 000000000..23f4da6c7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/PangaeaUtils.scala @@ -0,0 +1,136 @@ +package eu.dnetlib.dhp.sx.graph.pangaea + +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.{Encoder, Encoders} +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse + +import java.util.regex.Pattern +import scala.language.postfixOps +import scala.xml.{Elem, Node, XML} + +case class PangaeaDataModel( + identifier: String, + title: List[String], + objectType: List[String], + creator: List[String], + publisher: List[String], + dataCenter: List[String], + subject: List[String], + language: String, + rights: String, + parent: String, + relation: List[String], + linkage: List[(String, String)] +) {} + +object PangaeaUtils { + + def toDataset(input: String): PangaeaDataModel = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + val xml = (json \ "xml").extract[String] + parseXml(xml) + } + + def findDOIInRelation(input: List[String]): List[String] = { + val pattern = Pattern.compile("\\b(10[.][0-9]{4,}(?:[.][0-9]+)*\\/(?:(?![\"&\\'<>])\\S)+)\\b") + input + .map(i => { + val matcher = pattern.matcher(i) + if (matcher.find()) + matcher.group(0) + else + null + }) + .filter(i => i != null) + } + + def attributeOpt(attribute: String, node: Node): Option[String] = + node.attribute(attribute) flatMap (_.headOption) map (_.text) + + def extractLinkage(node: Elem): List[(String, String)] = { + (node \ "linkage") + .map(n => (attributeOpt("type", n), n.text)) + .filter(t => t._1.isDefined) + .map(t => (t._1.get, t._2))(collection.breakOut) + } + + def parseXml(input: String): PangaeaDataModel = { + val xml = XML.loadString(input) + + val identifier = (xml \ "identifier").text + val title: List[String] = (xml \ "title").map(n => n.text)(collection.breakOut) + val pType: List[String] = (xml \ "type").map(n => n.text)(collection.breakOut) + val creators: List[String] = (xml \ "creator").map(n => n.text)(collection.breakOut) + val publisher: List[String] = (xml \ "publisher").map(n => n.text)(collection.breakOut) + val dataCenter: List[String] = (xml \ "dataCenter").map(n => n.text)(collection.breakOut) + val subject: List[String] = (xml \ "subject").map(n => n.text)(collection.breakOut) + val language = (xml \ "language").text + val rights = (xml \ "rights").text + val parentIdentifier = (xml \ "parentIdentifier").text + val relation: List[String] = (xml \ "relation").map(n => n.text)(collection.breakOut) + val relationFiltered = findDOIInRelation(relation) + val linkage: List[(String, String)] = extractLinkage(xml) + + PangaeaDataModel( + identifier, + title, + pType, + creators, + publisher, + dataCenter, + subject, + language, + rights, + parentIdentifier, + relationFiltered, + linkage + ) + } + + def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] = + new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] { + + override def zero: PangaeaDataModel = null + + override def reduce(b: PangaeaDataModel, a: (String, PangaeaDataModel)): PangaeaDataModel = { + if (b == null) + a._2 + else { + if (a == null) + b + else { + if (b.title != null && b.title.nonEmpty) + b + else + a._2 + + } + } + } + + override def merge(b1: PangaeaDataModel, b2: PangaeaDataModel): PangaeaDataModel = { + if (b1 == null) + b2 + else { + if (b2 == null) + b1 + else { + if (b1.title != null && b1.title.nonEmpty) + b1 + else + b2 + + } + } + } + override def finish(reduction: PangaeaDataModel): PangaeaDataModel = reduction + + override def bufferEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] + + override def outputEncoder: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala similarity index 57% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala rename to dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index 79c75d6df..8ff8a8b1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -2,51 +2,57 @@ package eu.dnetlib.dhp.sx.graph.pangaea import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.{Logger, LoggerFactory} - import scala.collection.JavaConverters._ + import scala.io.Source object SparkGeneratePanagaeaDataset { - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/pangaea/pangaea_to_dataset.json")).mkString) + val parser = new ArgumentApplicationParser( + Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/pangaea/pangaea_to_dataset.json") + ) + .mkString + ) parser.parseArgument(args) - val spark: SparkSession = SparkSession .builder() .config(conf) .appName(SparkGeneratePanagaeaDataset.getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() + .master(parser.get("master")) + .getOrCreate() parser.getObjectMap.asScala.foreach(s => logger.info(s"${s._1} -> ${s._2}")) logger.info("Converting sequential file into Dataset") - val sc:SparkContext = spark.sparkContext + val sc: SparkContext = spark.sparkContext - val workingPath:String = parser.get("workingPath") + val workingPath: String = parser.get("workingPath") implicit val pangaeaEncoders: Encoder[PangaeaDataModel] = Encoders.kryo[PangaeaDataModel] - val inputRDD:RDD[PangaeaDataModel] = sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) + val inputRDD: RDD[PangaeaDataModel] = + sc.textFile(s"$workingPath/update").map(s => PangaeaUtils.toDataset(s)) - spark.createDataset(inputRDD).as[PangaeaDataModel] - .map(s => (s.identifier,s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) - .groupByKey(_._1)(Encoders.STRING) + spark + .createDataset(inputRDD) + .as[PangaeaDataModel] + .map(s => (s.identifier, s))(Encoders.tuple(Encoders.STRING, pangaeaEncoders)) + .groupByKey(_._1)(Encoders.STRING) .agg(PangaeaUtils.getDatasetAggregator().toColumn) .map(s => s._2) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset") + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/dataset") } - - - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index aa9535ef7..7c39efb40 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -12,6 +12,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlock; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -66,9 +68,59 @@ public class GraphCleaningFunctionsTest { Relation r_out = OafCleaner.apply(r_in, mapping); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); + + assertEquals("iis", r_out.getDataInfo().getProvenanceaction().getClassid()); + assertEquals("Inferred by OpenAIRE", r_out.getDataInfo().getProvenanceaction().getClassname()); } } + @Test + void testFilter_invisible_true() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + + @Test + void testFilter_true_nothing_to_filter() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + + @Test + void testFilter_missing_invisible() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + @Test void testCleaning() throws Exception { @@ -99,6 +151,12 @@ public class GraphCleaningFunctionsTest { assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid()); assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname()); + assertEquals("0027", p_out.getInstance().get(1).getInstancetype().getClassid()); + assertEquals("Model", p_out.getInstance().get(1).getInstancetype().getClassname()); + + assertEquals("xyz", p_out.getInstance().get(2).getInstancetype().getClassid()); + assertEquals("xyz", p_out.getInstance().get(2).getInstancetype().getClassname()); + assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid()); assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname()); @@ -112,7 +170,7 @@ public class GraphCleaningFunctionsTest { List poi = p_out.getInstance(); assertNotNull(poi); - assertEquals(1, poi.size()); + assertEquals(3, poi.size()); final Instance poii = poi.get(0); assertNotNull(poii); @@ -140,7 +198,7 @@ public class GraphCleaningFunctionsTest { assertEquals(5, p_out.getTitle().size()); - Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); + Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); assertEquals(3, p_cleaned.getTitle().size()); @@ -159,9 +217,12 @@ public class GraphCleaningFunctionsTest { assertEquals("1970-10-07", p_cleaned.getDateofacceptance().getValue()); + assertEquals("0038", p_cleaned.getInstance().get(2).getInstancetype().getClassid()); + assertEquals("Other literature type", p_cleaned.getInstance().get(2).getInstancetype().getClassname()); + final List pci = p_cleaned.getInstance(); assertNotNull(pci); - assertEquals(1, pci.size()); + assertEquals(3, pci.size()); final Instance pcii = pci.get(0); assertNotNull(pcii); @@ -222,4 +283,27 @@ public class GraphCleaningFunctionsTest { .readLines( GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); } + + @Test + public void testCleanDoiBoost() throws IOException { + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); + + Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); + } + + @Test + public void testCleanDoiBoost2() throws IOException { + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); + + Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); + + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 602aaf6e6..7300ce860 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -171,24 +171,6 @@ public class DumpJobTest { GraphResult gr = verificationDataset.first(); - Assertions.assertEquals(2, gr.getMeasures().size()); - Assertions - .assertTrue( - gr - .getMeasures() - .stream() - .anyMatch( - m -> m.getKey().equals("influence") - && m.getValue().equals("1.62759106106e-08"))); - Assertions - .assertTrue( - gr - .getMeasures() - .stream() - .anyMatch( - m -> m.getKey().equals("popularity") - && m.getValue().equals("0.22519296"))); - Assertions.assertEquals(6, gr.getAuthor().size()); Assertions .assertTrue( @@ -357,11 +339,10 @@ public class DumpJobTest { Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId()); - Assertions.assertEquals(2, gr.getOriginalId().size()); + Assertions.assertEquals(1, gr.getOriginalId().size()); Assertions .assertTrue( - gr.getOriginalId().contains("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2") - && gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); + gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); Assertions.assertEquals(1, gr.getPid().size()); Assertions @@ -910,7 +891,6 @@ public class DumpJobTest { DumpProducts dump = new DumpProducts(); dump .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); @@ -942,6 +922,46 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); + temp = spark + .sql( + "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + + "from check " + + "lateral view explode (instance) i as inst " + + "where inst.articleprocessingcharge is not null"); + + Assertions + .assertEquals( + "3131.64", + temp + .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") + .collectAsList() + .get(0) + .getString(1)); + Assertions + .assertEquals( + "EUR", + temp + .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") + .collectAsList() + .get(0) + .getString(2)); + + Assertions + .assertEquals( + "2578.35", + temp + .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") + .collectAsList() + .get(0) + .getString(1)); + Assertions + .assertEquals( + "EUR", + temp + .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") + .collectAsList() + .get(0) + .getString(2)); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 697ec705f..2cc53027e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -9,7 +9,7 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; -@Disabled +//@Disabled class GenerateJsonSchema { @Test @@ -21,7 +21,7 @@ class GenerateJsonSchema { configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(Relation.class); + JsonNode jsonSchema = generator.generateSchema(GraphResult.class); System.out.println(jsonSchema.toString()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index fe178795d..763318ae4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -81,8 +81,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); -// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -144,8 +142,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); -// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -203,4 +199,107 @@ public class DumpRelationTest { "and validationDate = '2021-08-06'") .count()); } + + @Test + public void test3() throws Exception {// + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); + + Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); + Assertions + .assertEquals( + 1, check + .filter( + "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + + "and provenance = 'Inferred by OpenAIRE'") + .count()); + } + + @Test + public void test4() throws Exception {// + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant;isAuthorInstitutionOf" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); + + Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count()); + + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index e43383ef4..27573bb32 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; @@ -96,6 +97,76 @@ public class ExtractRelationFromEntityTest { 9, verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daxab3685c3'").count()); + Assertions + .assertEquals( + "IsRelatedTo", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getName()); + + Assertions + .assertEquals( + "relationship", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getType()); + + Assertions + .assertEquals( + "context", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getSource() + .getType()); + + Assertions + .assertEquals( + "result", verificationDataset + .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) + .collectAsList() + .get(0) + .getTarget() + .getType()); + Assertions + .assertEquals( + "IsRelatedTo", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getName()); + + Assertions + .assertEquals( + "relationship", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getReltype() + .getType()); + + Assertions + .assertEquals( + "context", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getTarget() + .getType()); + + Assertions + .assertEquals( + "result", verificationDataset + .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) + .collectAsList() + .get(0) + .getSource() + .getType()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java new file mode 100644 index 000000000..3bd1c13de --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -0,0 +1,144 @@ + +package eu.dnetlib.dhp.oa.graph.group; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; + +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob; +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class GroupEntitiesSparkJobTest { + + private static SparkSession spark; + + private static ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + private static Path workingDir; + private Path dataInputPath; + + private Path groupEntityPath; + private Path dispatchEntityPath; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } + + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + groupEntityPath = workingDir.resolve("grouped_entity"); + dispatchEntityPath = workingDir.resolve("dispatched_entity"); + } + + @AfterAll + public static void afterAll() throws IOException { + spark.stop(); + FileUtils.deleteDirectory(workingDir.toFile()); + } + + @Test + @Order(1) + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + dataInputPath.toString(), + "-outputPath", + groupEntityPath.toString() + }); + + Dataset output = spark + .read() + .textFile(groupEntityPath.toString()) + .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + assertEquals( + 1, + output + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); + } + + @Test + @Order(2) + void testDispatchEntities() throws Exception { + for (String type : Lists + .newArrayList( + Publication.class.getCanonicalName(), eu.dnetlib.dhp.schema.oaf.Dataset.class.getCanonicalName())) { + String directory = StringUtils.substringAfterLast(type, ".").toLowerCase(); + DispatchEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + groupEntityPath.toString(), + "-outputPath", + dispatchEntityPath.resolve(directory).toString(), + "-graphTableClassName", + type + }); + } + + Dataset output = spark + .read() + .textFile( + DHPUtils + .toSeq( + HdfsSupport + .listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration()))) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + assertEquals(3, output.count()); + assertEquals( + 2, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("publication")) + .count()); + assertEquals( + 1, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("dataset")) + .count()); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala deleted file mode 100644 index 4613d5636..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ /dev/null @@ -1,138 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostedbymap - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} -import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} -import org.junit.jupiter.api.Test - -class TestApply extends java.io.Serializable{ - - @Test - def testApplyOnResult (): Unit = { - val conf = new SparkConf() - conf.setMaster("local[*]") - conf.set("spark.driver.host", "localhost") - val spark: SparkSession = - SparkSession - .builder() - .appName(getClass.getSimpleName) - .config(conf) - .getOrCreate() - val pub = getClass.getResource("publication.json").getPath - val hbm = getClass.getResource("preparedInfo.json").getPath - - val mapper:ObjectMapper = new ObjectMapper() - - implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - implicit val mapEncoderPubInfo: Encoder[Publication] = Encoders.bean(classOf[Publication]) - - - val pub_ds :Dataset[Publication] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) - - - assertEquals(13, pub_ds.count()) - - val ds:Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) - - assertEquals(13, ds.count) - - val temp: Dataset[(Publication, Publication)] = pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") - assertEquals(13, temp.count()) - temp.foreach(t2 => { - val pb : Publication = t2._1 - val pa : Publication = t2._2 - assertEquals(1, pa.getInstance().size()) - assertEquals(1, pb.getInstance().size()) - assertTrue(t2._1.getId.equals(t2._2.getId)) - if(pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")){ - assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735")) - assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) - assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) - assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) - assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold)) - assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) - assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) - - - assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) - assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("not available")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("UNKNOWN")) - assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) - assertTrue(pb.getBestaccessright.getClassid.equals("UNKNOWN")) - assertTrue(pb.getBestaccessright.getClassname.equals("not available")) - - }else{ - assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) - assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals(pb.getInstance().get(0).getHostedby.getValue)) - assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals(pb.getInstance().get(0).getAccessright.getClassid)) - assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals(pb.getInstance().get(0).getAccessright.getClassname)) - assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb.getInstance().get(0).getAccessright.getOpenAccessRoute) - - } - }) - - spark.close() - } - - - @Test - def testApplyOnDatasource():Unit = { - val conf = new SparkConf() - conf.setMaster("local[*]") - conf.set("spark.driver.host", "localhost") - val spark: SparkSession = - SparkSession - .builder() - .appName(getClass.getSimpleName) - .config(conf) - .getOrCreate() - val dats = getClass.getResource("datasource.json").getPath - val hbm = getClass.getResource("preparedInfo2.json").getPath - - val mapper:ObjectMapper = new ObjectMapper() - - implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - implicit val mapEncoderPubInfo: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - - - val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) - val hbm_ds :Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo]))) - - - assertEquals(10, dats_ds.count()) - - val ds:Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) - - assertEquals(10, ds.count) - - val temp: Dataset[(Datasource, Datasource)] = dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") - assertEquals(10, temp.count()) - temp.foreach(t2 => { - val pb : Datasource = t2._1 - val pa : Datasource = t2._2 - assertTrue(t2._1.getId.equals(t2._2.getId)) - if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { - assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) - assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) - - assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) - - - } else { - assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) - assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) - - } - }) - - spark.close() - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java new file mode 100644 index 000000000..85cb551bc --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.oa.graph.raw; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +public class CopyHdfsOafSparkApplicationTest { + + @Test + void testIsOafType() throws IOException { + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/publication_1.json")), + "publication")); + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json")), + "dataset")); + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/relation_1.json")), + "relation")); + + assertFalse( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/publication_1.json")), + "dataset")); + assertFalse( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json")), + "publication")); + + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass() + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json")), + "publication")); + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 67490a470..53b3f8432 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -51,8 +51,8 @@ class GenerateEntitiesApplicationTest { Result software = getResult("odf_software.xml", Software.class); Result orp = getResult("oaf_orp.xml", OtherResearchProduct.class); - verifyMerge(publication, dataset, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(dataset, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(publication, dataset, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); + verifyMerge(dataset, publication, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); verifyMerge(publication, software, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); verifyMerge(software, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 27e33bf27..bdb73abf5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -355,6 +356,25 @@ class MappersTest { assertTrue(r2.getValidated()); assertEquals("2020-01-01", r1.getValidationDate()); assertEquals("2020-01-01", r2.getValidationDate()); + + assertNotNull(d.getTitle()); + assertEquals(2, d.getTitle().size()); + verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017"); + verifyTitle(d, "Subtitle", "survey"); + } + + private void verifyTitle(Dataset d, String titleType, String title) { + Optional + .of( + d + .getTitle() + .stream() + .filter(t -> titleType.equals(t.getQualifier().getClassid())) + .collect(Collectors.toList())) + .ifPresent(t -> { + assertEquals(1, t.size()); + assertEquals(title, t.get(0).getValue()); + }); } @Test @@ -708,7 +728,7 @@ class MappersTest { assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); - final Publication p_cleaned = cleanup(fixVocabularyNames(p)); + final Publication p_cleaned = cleanup(fixVocabularyNames(p), vocs); assertNotNull(p_cleaned.getTitle()); assertFalse(p_cleaned.getTitle().isEmpty()); @@ -794,6 +814,27 @@ class MappersTest { } } + @Test + void testOpenAPC() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_openapc.xml"))); + final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertTrue(p.getInstance().size() > 0); + + assertEquals("https://doi.org/10.1155/2015/439379", p.getInstance().get(0).getUrl().get(0)); + + assertTrue(p.getProcessingchargeamount() != null); + assertTrue(p.getProcessingchargecurrency() != null); + + assertEquals("1721.47", p.getProcessingchargeamount().getValue()); + assertEquals("EUR", p.getProcessingchargecurrency().getValue()); + } + private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala deleted file mode 100644 index 46bf48974..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ /dev/null @@ -1,187 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.resolution - - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.schema.common.EntityType -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{Result, StructuredProperty} -import org.apache.commons.io.FileUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql._ -import org.junit.jupiter.api.Assertions._ -import org.junit.jupiter.api.TestInstance.Lifecycle -import org.junit.jupiter.api.{AfterAll, BeforeAll, Test, TestInstance} - -import java.nio.file.{Files, Path} -import scala.collection.JavaConverters._ -import scala.io.Source - -@TestInstance(Lifecycle.PER_CLASS) -class ResolveEntitiesTest extends Serializable { - - var workingDir:Path = null - - val FAKE_TITLE = "FAKETITLE" - val FAKE_SUBJECT = "FAKESUBJECT" - - var sparkSession:Option[SparkSession] = None - - - @BeforeAll - def setUp() :Unit = { - workingDir = Files.createTempDirectory(getClass.getSimpleName) - - val conf = new SparkConf() - sparkSession = Some(SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master("local[*]").getOrCreate()) - populateDatasets(sparkSession.get) - generateUpdates(sparkSession.get) - - } - - - @AfterAll - def tearDown():Unit = { - FileUtils.deleteDirectory(workingDir.toFile) - sparkSession.get.stop() - - - } - - - def generateUpdates(spark:SparkSession):Unit = { - val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString - - - val pids:List[String] = template.lines.map{id => - val r = new Result - r.setId(id.toLowerCase.trim) - r.setSubject(List(OafMapperUtils.structuredProperty(FAKE_SUBJECT, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) - r.setTitle(List(OafMapperUtils.structuredProperty(FAKE_TITLE, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) - r - }.map{r => - val mapper = new ObjectMapper() - - mapper.writeValueAsString(r)}.toList - - - val sc =spark.sparkContext - - println(sc.parallelize(pids).count()) - - spark.createDataset(sc.parallelize(pids))(Encoders.STRING).write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingDir/updates") - - - - - - import spark.implicits._ - implicit val resEncoder: Encoder[Result] = Encoders.bean(classOf[Result]) - val ds = spark.read.text(s"$workingDir/updates").as[String].map{s => val mapper = new ObjectMapper() - mapper.readValue(s, classOf[Result])}.collect() - - - - - assertEquals(4, ds.length) - ds.foreach{r => assertNotNull(r.getSubject)} - ds.foreach{r => assertEquals(1,r.getSubject.size())} - ds.foreach{r => assertNotNull(r.getTitle)} - ds.foreach{r => assertEquals(1,r.getTitle.size())} - - - - ds.flatMap(r => r.getTitle.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_TITLE,t)) - ds.flatMap(r => r.getSubject.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_SUBJECT,t)) - - println("generated Updates") - } - - - def populateDatasets(spark:SparkSession):Unit = { - import spark.implicits._ - val entities =SparkResolveEntities.entities - - entities.foreach{ - e => - val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString - spark.createDataset(spark.sparkContext.parallelize(template.lines.toList)).as[String].write.option("compression", "gzip").text(s"$workingDir/graph/$e") - println(s"Created Dataset $e") - } - SparkResolveRelation.extractPidResolvedTableFromJsonRDD(spark, s"$workingDir/graph", s"$workingDir/work") - - } - - - @Test - def testResolution():Unit = { - val spark:SparkSession = sparkSession.get - implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) - - val ds = spark.read.load(s"$workingDir/work/resolvedEntities").as[Result] - - assertEquals(3, ds.count()) - - ds.collect().foreach{ - r => - assertTrue(r.getId.startsWith("50")) - } - } - - - - - private def structuredPContainsValue(l:java.util.List[StructuredProperty], exptectedValue:String):Boolean = { - l.asScala.exists(p =>p.getValue!= null && p.getValue.equalsIgnoreCase(exptectedValue)) - } - - @Test - def testUpdate():Unit = { - val spark:SparkSession = sparkSession.get - import spark.implicits._ - implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - val m = new ObjectMapper() - SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) - SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph" ) - - - - val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) - val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() - - - - val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) - val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() - - - val softDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) - val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() - - - val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) - val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() - - - assertEquals(0, t) - assertEquals(2, td) - assertEquals(1, ts) - assertEquals(0, to) - - } - - - - - - - - - - - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json new file mode 100644 index 000000000..061145d0f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json @@ -0,0 +1 @@ +{"context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-11-06T11:36:37Z"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "id": "50|doi_________::b0baa0eb88a5788f0b8815560d2a32f2", "subject": [], "lastupdatetimestamp": 1620353302565, "author": [{"fullname": "N. S. AGRUSS", "surname": "AGRUSS", "name": "N. S.", "rank": 1}, {"fullname": "E. Y. ROSIN", "surname": "ROSIN", "name": "E. Y.", "rank": 2}, {"fullname": "R. J. ADOLPH", "surname": "ADOLPH", "name": "R. J.", "rank": 3}, {"fullname": "N. O. FOWLER", "surname": "FOWLER", "name": "N. O.", "rank": 4}], "instance": [{"hostedby": {"key": "10|issn___print::b8cee613d4f898f8c03956d57ea69be2", "value": "Survey of Anesthesiology"}, "url": ["https://doi.org/10.1097/00132586-197308000-00003"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T02:08:22Z", "fulltext": [], "description": [], "format": [], "journal": {"issnPrinted": "0039-6206", "vol": "17", "sp": "304", "name": "Survey of Anesthesiology"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Ovid Technologies (Wolters Kluwer Health)"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.1097/00132586-197308000-00003", "50|doiboost____::b0baa0eb88a5788f0b8815560d2a32f2"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "SIGNIFICANCE OF CHRONIC SINUS BRADYCARDIA IN ELDERLY PEOPLE"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json new file mode 100644 index 000000000..cf81b0286 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json @@ -0,0 +1 @@ +{"context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2007-08-20T08:35:04Z"}, {"qualifier": {"classid": "published-online", "classname": "published-online", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-01-01"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "id": "50|doi_________::4972b0ca81b96b225aed8038bb965656", "subject": [{"qualifier": {"classid": "keywords", "classname": "keywords", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "General Medicine"}], "lastupdatetimestamp": 1620381522840, "author": [{"fullname": "null VERHAMME P", "surname": "VERHAMME P", "rank": 1}], "instance": [{"hostedby": {"key": "10|issn__online::7ec728ad1ac65c60cd563a5137111125", "value": "Tijdschrift voor Geneeskunde"}, "url": ["https://doi.org/10.2143/tvg.62.1.5002364"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "dateofacceptance": {"value": "2006-01-01"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T09:58:42Z", "fulltext": [], "description": [], "format": [], "journal": {"vol": "62", "sp": "55", "issnOnline": "0371-683X", "ep": "61", "name": "Tijdschrift voor Geneeskunde"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Peeters Publishers"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.2143/tvg.62.1.5002364", "50|doiboost____::4972b0ca81b96b225aed8038bb965656"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-01-01"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Antitrombotica: nieuwe moleculen"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json index 97764de00..06eb9bae0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json @@ -1,10 +1,10 @@ -{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"} -{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} -{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} \ No newline at end of file +{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} +{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","dataInfo": {"provenanceaction": {"classid": "iis", "classname": "erroneous label to be cleaned","schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index b3e302474..78fdc4c9d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -403,6 +403,178 @@ "http://juuli.fi/Record/0275158616", "http://dx.doi.org/10.1007/s109090161569x" ] + }, + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1002/s21010127267xy" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Model", + "classname": "Model", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://dx.doi.org/10.1002/s21010127267xy" + ] + }, + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1002/s21010127267xy" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "xyz", + "classname": "xyz", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://dx.doi.org/10.1002/t32121238378t" + ] } ], "journal": { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json new file mode 100644 index 000000000..ffcb187c1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json @@ -0,0 +1,958 @@ +{ + "author": [ + { + "affiliation": [ + ], + "fullname": "Brien, Tom", + "name": "Tom", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } + ], + "rank": 1, + "surname": "Brien" + }, + { + "affiliation": [ + ], + "fullname": "Ade, Peter", + "name": "Peter", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "xyz", + "classname": "XYZ", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "qwerty" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID", + "classname": "ORCID", + "schemeid": "", + "schemename": "" + }, + "value": "asdasd" + } + ], + "rank": 2, + "surname": "Ade" + }, + { + "affiliation": [ + ], + "fullname": "Barry, Peter S.", + "name": "Peter S.", + "pid": null, + "rank": 3, + "surname": "Barry" + }, + { + "affiliation": [ + ], + "fullname": "Dunscombe, Chris J.", + "name": "Chris J.", + "pid": [ + ], + "rank": 4, + "surname": "Dunscombe" + }, + { + "affiliation": [ + ], + "fullname": "Leadley, David R.", + "name": "David R.", + "pid": [ + ], + "rank": 5, + "surname": "Leadley" + }, + { + "affiliation": [ + ], + "fullname": "Morozov, Dmitry V.", + "name": "Dmitry V.", + "pid": [ + ], + "rank": 6, + "surname": "Morozov" + }, + { + "affiliation": [ + ], + "fullname": "Myronov, Maksym", + "name": "Maksym", + "pid": [ + ], + "rank": 7, + "surname": "Myronov" + }, + { + "affiliation": [ + ], + "fullname": "Parker, Evan", + "name": "Evan", + "pid": [ + ], + "rank": 8, + "surname": "Parker" + }, + { + "affiliation": [ + ], + "fullname": "Prest, Martin J.", + "name": "Martin J.", + "pid": [ + ], + "rank": 9, + "surname": "Prest" + }, + { + "affiliation": [ + ], + "fullname": "Prunnila, Mika", + "name": "Mika", + "pid": [ + ], + "rank": 10, + "surname": "Prunnila" + }, + { + "affiliation": [ + ], + "fullname": "Sudiwala, Rashmi V.", + "name": "Rashmi V.", + "pid": [ + ], + "rank": 11, + "surname": "Sudiwala" + }, + { + "affiliation": [ + ], + "fullname": "Whall, Terry E.", + "name": "Terry E.", + "pid": [ + ], + "rank": 12, + "surname": "Whall" + }, + { + "affiliation": [ + ], + "fullname": "Mauskopf", + "name": "", + "pid": [ + ], + "rank": 13, + "surname": "" + }, + { + "affiliation": [ + ], + "fullname": " P. D. ", + "name": "", + "pid": [ + ], + "rank": 14, + "surname": "" + } + ], + "bestaccessright": null, + "publisher": { + "value": null + }, + "collectedfrom": [ + { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + } + ], + "context": [ + ], + "contributor": [ + ], + "country": [ + { + "classid": "DE", + "classname": "DE", + "schemeid": "dnet:countries", + "schemename": "dnet:countries" + } + ], + "coverage": [ + ], + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": true, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "7 oct 1970" + }, + "dateofcollection": "", + "dateoftransformation": "2020-04-22T12:34:08.009Z", + "description": [ + ], + "externalReference": [ + ], + "extraInfo": [ + ], + "format": [ + ], + "fulltext": [ + ], + "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", + "instance": [ + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Comment/debate", + "classname": "Comment/debate", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://juuli.fi/Record/0275158616", + "http://dx.doi.org/10.1007/s109090161569x" + ] + } + ], + "journal": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "edition": "", + "ep": " 7", + "iss": "9 March", + "issnLinking": "", + "issnOnline": "", + "issnPrinted": "0022-2291", + "name": "Journal of Low Temperature Physics - Early Acces", + "sp": "1 ", + "vol": "" + }, + "language": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "lastupdatetimestamp": 1591283286319, + "oaiprovenance": { + "originDescription": { + "altered": true, + "baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif", + "datestamp": "2019-07-30", + "harvestDate": "2020-04-22T11:04:38.685Z", + "identifier": "oai:virta-jtp.csc.fi:Publications/0275158616", + "metadataNamespace": "" + } + }, + "originalId": [ + "CSC_________::2250a70c903c6ac6e4c01438259e9375" + ], + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "" + } + ], + "relevantdate": [ + ], + "resourcetype": { + "classid": "0001", + "classname": "0001", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "source": [ + ], + "subject": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "ta213" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "infrared detectors" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "lens antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "slot antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "strained silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "cold electron bolometers" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "doped silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "measure noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "noise equivalent power" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical characterisation" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical response" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "photon noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon absorbers" + } + ], + "title": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "test test 123 test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "omic" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "-" + } + ] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json new file mode 100644 index 000000000..ac6884741 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json @@ -0,0 +1,957 @@ +{ + "author": [ + { + "affiliation": [ + ], + "fullname": "Brien, Tom", + "name": "Tom", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } + ], + "rank": 1, + "surname": "Brien" + }, + { + "affiliation": [ + ], + "fullname": "Ade, Peter", + "name": "Peter", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "xyz", + "classname": "XYZ", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "qwerty" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID", + "classname": "ORCID", + "schemeid": "", + "schemename": "" + }, + "value": "asdasd" + } + ], + "rank": 2, + "surname": "Ade" + }, + { + "affiliation": [ + ], + "fullname": "Barry, Peter S.", + "name": "Peter S.", + "pid": null, + "rank": 3, + "surname": "Barry" + }, + { + "affiliation": [ + ], + "fullname": "Dunscombe, Chris J.", + "name": "Chris J.", + "pid": [ + ], + "rank": 4, + "surname": "Dunscombe" + }, + { + "affiliation": [ + ], + "fullname": "Leadley, David R.", + "name": "David R.", + "pid": [ + ], + "rank": 5, + "surname": "Leadley" + }, + { + "affiliation": [ + ], + "fullname": "Morozov, Dmitry V.", + "name": "Dmitry V.", + "pid": [ + ], + "rank": 6, + "surname": "Morozov" + }, + { + "affiliation": [ + ], + "fullname": "Myronov, Maksym", + "name": "Maksym", + "pid": [ + ], + "rank": 7, + "surname": "Myronov" + }, + { + "affiliation": [ + ], + "fullname": "Parker, Evan", + "name": "Evan", + "pid": [ + ], + "rank": 8, + "surname": "Parker" + }, + { + "affiliation": [ + ], + "fullname": "Prest, Martin J.", + "name": "Martin J.", + "pid": [ + ], + "rank": 9, + "surname": "Prest" + }, + { + "affiliation": [ + ], + "fullname": "Prunnila, Mika", + "name": "Mika", + "pid": [ + ], + "rank": 10, + "surname": "Prunnila" + }, + { + "affiliation": [ + ], + "fullname": "Sudiwala, Rashmi V.", + "name": "Rashmi V.", + "pid": [ + ], + "rank": 11, + "surname": "Sudiwala" + }, + { + "affiliation": [ + ], + "fullname": "Whall, Terry E.", + "name": "Terry E.", + "pid": [ + ], + "rank": 12, + "surname": "Whall" + }, + { + "affiliation": [ + ], + "fullname": "Mauskopf", + "name": "", + "pid": [ + ], + "rank": 13, + "surname": "" + }, + { + "affiliation": [ + ], + "fullname": " P. D. ", + "name": "", + "pid": [ + ], + "rank": 14, + "surname": "" + } + ], + "bestaccessright": null, + "publisher": { + "value": null + }, + "collectedfrom": [ + { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + } + ], + "context": [ + ], + "contributor": [ + ], + "country": [ + { + "classid": "DE", + "classname": "DE", + "schemeid": "dnet:countries", + "schemename": "dnet:countries" + } + ], + "coverage": [ + ], + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "7 oct 1970" + }, + "dateofcollection": "", + "dateoftransformation": "2020-04-22T12:34:08.009Z", + "description": [ + ], + "externalReference": [ + ], + "extraInfo": [ + ], + "format": [ + ], + "fulltext": [ + ], + "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", + "instance": [ + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Comment/debate", + "classname": "Comment/debate", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://juuli.fi/Record/0275158616", + "http://dx.doi.org/10.1007/s109090161569x" + ] + } + ], + "journal": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "edition": "", + "ep": " 7", + "iss": "9 March", + "issnLinking": "", + "issnOnline": "", + "issnPrinted": "0022-2291", + "name": "Journal of Low Temperature Physics - Early Acces", + "sp": "1 ", + "vol": "" + }, + "language": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "lastupdatetimestamp": 1591283286319, + "oaiprovenance": { + "originDescription": { + "altered": true, + "baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif", + "datestamp": "2019-07-30", + "harvestDate": "2020-04-22T11:04:38.685Z", + "identifier": "oai:virta-jtp.csc.fi:Publications/0275158616", + "metadataNamespace": "" + } + }, + "originalId": [ + "CSC_________::2250a70c903c6ac6e4c01438259e9375" + ], + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "" + } + ], + "relevantdate": [ + ], + "resourcetype": { + "classid": "0001", + "classname": "0001", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "source": [ + ], + "subject": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "ta213" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "infrared detectors" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "lens antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "slot antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "strained silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "cold electron bolometers" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "doped silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "measure noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "noise equivalent power" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical characterisation" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical response" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "photon noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon absorbers" + } + ], + "title": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "test test 123 test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "omic" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "-" + } + ] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 79dc7cd2d..09bd58aeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -1241,4 +1241,5 @@ dnet:relation_relClass @=@ Reviews @=@ reviews dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo -dnet:relation_subRelType @=@ relationship @=@ publicationDataset \ No newline at end of file +dnet:relation_subRelType @=@ relationship @=@ publicationDataset +dnet:provenanceActions @=@ iis @=@ erroneous label to be cleaned \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel index 6b146405a..979cbf168 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -1,2 +1,2 @@ -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance index 043be7b1a..468811513 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance @@ -1 +1 @@ -{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json new file mode 100644 index 000000000..e30be47e9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json new file mode 100644 index 000000000..29ce76df3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json new file mode 100644 index 000000000..c22dc94e3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml index e69de29bb..4a326a21e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml @@ -0,0 +1,45 @@ + + + + openapc_____::000023f9cb6e3a247c764daec4273cbc + 10.1155/2015/439379 + 2022-02-01T15:26:33.817Z + openapc_____ + 2022-02-02T15:45:32.502Z + + + https://doi.org/10.1155/2015/439379 + 10.1155/2015/439379 + PMC4354964 + 25811027.0 + UCL + UCL + 1721.47 + BioMed Research International + http://creativecommons.org/licenses/by/3.0/ + 2015 + 0004 + OPEN + open access + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml index 31de2e45b..4f41ee6ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml @@ -54,7 +54,8 @@ - Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 + Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 + survey Zenodo 2019 diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json new file mode 100644 index 000000000..bd3710234 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json @@ -0,0 +1 @@ +{"author":[{"fullname":"Makkonen, Lasse","name":"Lasse","pid":[],"rank":1,"surname":"Makkonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Watson, Rick"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"dateofcollection":"2021-10-04T12:42:57.502Z","dateoftransformation":"2021-10-04T15:32:51.877Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433424020,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-10-26T12:07:44Z","harvestDate":"2021-10-04T12:42:57.502Z","identifier":"oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Irish Wind Energy Association"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Makkonen , L 1997 , Calculation of ice accretion on wind turbine blades . in R Watson (ed.) , EWEC '97: European Wind Energy Conference : Proceedings of the international conference . Irish Wind Energy Association , Slane , European Wind Energy Conference EWEC '97 , Dublin , Ireland , 6/10/97 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Calculation of ice accretion on wind turbine blades"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json new file mode 100644 index 000000000..364796c24 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json @@ -0,0 +1 @@ +{"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353", "resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"}, "unknownProperty": "asdasd"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json new file mode 100644 index 000000000..31755c53d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json @@ -0,0 +1,6 @@ +{"source":"1a","target":"10a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"10a","target":"1a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala new file mode 100644 index 000000000..7e41e993f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -0,0 +1,198 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestApply extends java.io.Serializable { + + @Test + def testApplyOnResult(): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("publication.json").getPath + val hbm = getClass.getResource("preparedInfo.json").getPath + + val mapper: ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + val pub_ds: Dataset[Publication] = + spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) + val hbm_ds: Dataset[EntityInfo] = + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + assertEquals(13, pub_ds.count()) + + val ds: Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) + + assertEquals(13, ds.count) + + val temp: Dataset[(Publication, Publication)] = + pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(13, temp.count()) + temp.foreach(t2 => { + val pb: Publication = t2._1 + val pa: Publication = t2._2 + assertEquals(1, pa.getInstance().size()) + assertEquals(1, pb.getInstance().size()) + assertTrue(t2._1.getId.equals(t2._2.getId)) + if (pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")) { + assertTrue( + pa.getInstance() + .get(0) + .getHostedby + .getKey + .equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735") + ) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue( + pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold) + ) + assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) + assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) + + assertTrue( + pb.getInstance() + .get(0) + .getHostedby + .getKey + .equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c") + ) + assertTrue( + pb.getInstance() + .get(0) + .getHostedby + .getValue + .equals("Revistas de investigación Universidad Nacional Mayor de San Marcos") + ) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("not available")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("UNKNOWN")) + assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + assertTrue(pb.getBestaccessright.getClassid.equals("UNKNOWN")) + assertTrue(pb.getBestaccessright.getClassname.equals("not available")) + + } else { + assertTrue( + pa.getInstance() + .get(0) + .getHostedby + .getKey + .equals(pb.getInstance().get(0).getHostedby.getKey) + ) + assertTrue( + pa.getInstance() + .get(0) + .getHostedby + .getValue + .equals(pb.getInstance().get(0).getHostedby.getValue) + ) + assertTrue( + pa.getInstance() + .get(0) + .getAccessright + .getClassid + .equals(pb.getInstance().get(0).getAccessright.getClassid) + ) + assertTrue( + pa.getInstance() + .get(0) + .getAccessright + .getClassname + .equals(pb.getInstance().get(0).getAccessright.getClassname) + ) + assertTrue( + pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb + .getInstance() + .get(0) + .getAccessright + .getOpenAccessRoute + ) + + } + }) + + spark.close() + } + + @Test + def testApplyOnDatasource(): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val dats = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("preparedInfo2.json").getPath + + val mapper: ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + val dats_ds: Dataset[Datasource] = + spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) + val hbm_ds: Dataset[EntityInfo] = Aggregators.datasourceToSingleId( + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + ) + + assertEquals(10, dats_ds.count()) + + val ds: Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) + + assertEquals(10, ds.count) + + val temp: Dataset[(Datasource, Datasource)] = + dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(10, temp.count()) + temp.foreach(t2 => { + val pb: Datasource = t2._1 + val pa: Datasource = t2._2 + assertTrue(t2._1.getId.equals(t2._2.getId)) + if (pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { + assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) + assertTrue( + pa.getOpenairecompatibility() + .getClassname + .equals("collected from a compatible aggregator") + ) + + assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) + + } else { + assertTrue( + pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid) + ) + assertTrue( + pa.getOpenairecompatibility() + .getClassname + .equals(pb.getOpenairecompatibility.getClassname) + ) + + } + }) + + spark.close() + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala similarity index 66% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index a3a753a8a..5fc29e3b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -3,19 +3,15 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo -import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} -import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} -import org.json4s import org.json4s.DefaultFormats -import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test -class TestPrepare extends java.io.Serializable{ +class TestPrepare extends java.io.Serializable { - def getString(input:HostedByItemType):String = { + def getString(input: HostedByItemType): String = { import org.json4s.jackson.Serialization.write implicit val formats = DefaultFormats @@ -23,9 +19,8 @@ class TestPrepare extends java.io.Serializable{ write(input) } - @Test - def testHostedByMaptoEntityInfo() : Unit = { + def testHostedByMaptoEntityInfo(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -37,23 +32,23 @@ class TestPrepare extends java.io.Serializable{ .getOrCreate() val hbm = getClass.getResource("hostedbymap.json").getPath - import spark.implicits._ - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val ds :Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) + val ds: Dataset[EntityInfo] = + spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) ds.foreach(e => println(mapper.writeValueAsString(e))) - assertEquals(20, ds.count) + assertEquals(20, ds.count) spark.close() } @Test - def testPublicationtoEntityInfo() : Unit = { + def testPublicationtoEntityInfo(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -65,24 +60,30 @@ class TestPrepare extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("publication.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val ds :Dataset[EntityInfo] = prepareResultInfo(spark, path) + val ds: Dataset[EntityInfo] = prepareResultInfo(spark, path) ds.foreach(e => println(mapper.writeValueAsString(e))) - assertEquals(2, ds.count) + assertEquals(2, ds.count) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId) + assertEquals( + "50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", + ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId + ) + assertEquals( + "50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", + ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId + ) spark.close() } @Test - def testJoinResHBM (): Unit = { + def testJoinResHBM(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -95,18 +96,20 @@ class TestPrepare extends java.io.Serializable{ val pub = getClass.getResource("iteminfofrompublication").getPath val hbm = getClass.getResource("iteminfofromhostedbymap.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + val pub_ds: Dataset[EntityInfo] = + spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds: Dataset[EntityInfo] = + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) - assertEquals(1, ds.count) + assertEquals(1, ds.count) - val ei:EntityInfo = ds.first() + val ei: EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) @@ -118,7 +121,7 @@ class TestPrepare extends java.io.Serializable{ } @Test - def testJoinResHBM2 (): Unit = { + def testJoinResHBM2(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -131,18 +134,20 @@ class TestPrepare extends java.io.Serializable{ val pub = getClass.getResource("iteminfofrompublication2").getPath val hbm = getClass.getResource("iteminfofromhostedbymap2.json").getPath - val mapper:ObjectMapper = new ObjectMapper() + val mapper: ObjectMapper = new ObjectMapper() implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + val pub_ds: Dataset[EntityInfo] = + spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds: Dataset[EntityInfo] = + spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) - assertEquals(1, ds.count) + assertEquals(1, ds.count) - val ei:EntityInfo = ds.first() + val ei: EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) @@ -154,6 +159,4 @@ class TestPrepare extends java.io.Serializable{ spark.close() } - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala similarity index 56% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 5b00e9b6f..12879c466 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -4,25 +4,23 @@ import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats -import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} -import org.junit.jupiter.api.Test -import org.junit.jupiter.api.Assertions._ import org.json4s.jackson.Serialization.write +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test -class TestPreprocess extends java.io.Serializable{ +class TestPreprocess extends java.io.Serializable { implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource] implicit val schema = Encoders.product[HostedByInfo] - - def toHBIString (hbi:HostedByItemType): String = { + def toHBIString(hbi: HostedByItemType): String = { implicit val formats = DefaultFormats write(hbi) } @Test - def readDatasource():Unit = { + def readDatasource(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -34,29 +32,40 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("datasource.json").getPath - val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path) + val ds: Dataset[HostedByItemType] = SparkProduceHostedByMap.oaHostedByDataset(spark, path) - assertEquals(9, ds.count) + assertEquals(9, ds.count) assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count) assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count) assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) - assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertEquals( + 0, + ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count + ) assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1) assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1) - assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + assertTrue( + ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1 + ) + assertTrue( + ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1 + ) + assertTrue( + ds.filter(hbi => + hbi.issn.equals("0212-8365") && hbi.id + .equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c") + ).count == 1 + ) ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|"))) ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } - @Test - def readGold():Unit = { + def readGold(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -68,8 +77,7 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("unibi_transformed.json").getPath - - val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path) + val ds: Dataset[HostedByItemType] = SparkProduceHostedByMap.goldHostedByDataset(spark, path) assertEquals(29, ds.count) @@ -77,9 +85,17 @@ class TestPreprocess extends java.io.Serializable{ assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count) assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count) - assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertEquals( + 0, + ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count + ) - assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development.")) + assertTrue( + ds.filter(hbi => hbi.issn.equals("2239-6101")) + .first() + .officialname + .equals("European journal of sustainable development.") + ) assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938")) assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1) ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI))) @@ -89,7 +105,7 @@ class TestPreprocess extends java.io.Serializable{ } @Test - def readDoaj():Unit = { + def readDoaj(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -101,7 +117,7 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("doaj_transformed.json").getPath - val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path) + val ds: Dataset[HostedByItemType] = SparkProduceHostedByMap.doajHostedByDataset(spark, path) assertEquals(25, ds.count) @@ -109,9 +125,17 @@ class TestPreprocess extends java.io.Serializable{ assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count) assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) - assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertEquals( + 0, + ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count + ) - assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology")) + assertTrue( + ds.filter(hbi => hbi.issn.equals("2077-3099")) + .first() + .officialname + .equals("Journal of Space Technology") + ) assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029")) assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1) assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals("")) @@ -122,7 +146,7 @@ class TestPreprocess extends java.io.Serializable{ } @Test - def testAggregator() : Unit = { + def testAggregator(): Unit = { val conf = new SparkConf() conf.setMaster("local[*]") @@ -134,22 +158,40 @@ class TestPreprocess extends java.io.Serializable{ .config(conf) .getOrCreate() - - val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) - .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) - .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) - .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + val tmp = SparkProduceHostedByMap + .oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union( + SparkProduceHostedByMap + .goldHostedByDataset(spark, getClass.getResource("unibi_transformed.json").getPath) + ) + .union( + SparkProduceHostedByMap + .doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath) + ) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))( + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + ) assertEquals(106, tmp.count) assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count) + val ds: Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType( + SparkProduceHostedByMap + .oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union( + SparkProduceHostedByMap + .goldHostedByDataset(spark, getClass.getResource("unibi_transformed.json").getPath) + ) + .union( + SparkProduceHostedByMap + .doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath) + ) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))( + Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + ) + ) - val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) - .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) - .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) - .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))) - - assertEquals(82, ds.count) + assertEquals(82, ds.count) assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count) @@ -157,14 +199,13 @@ class TestPreprocess extends java.io.Serializable{ assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess) assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count) - val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) + val hbmap: Dataset[String] = ds + .filter(hbi => hbi._2.id.startsWith("10|")) + .map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) hbmap.foreach(entry => println(entry)) spark.close() } - - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala new file mode 100644 index 000000000..c8e41743f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -0,0 +1,278 @@ +package eu.dnetlib.dhp.oa.graph.resolution + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.schema.common.EntityType +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{Publication, Result, StructuredProperty} +import org.apache.commons.io.FileUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.TestInstance.Lifecycle +import org.junit.jupiter.api.{AfterAll, BeforeAll, Test, TestInstance} + +import java.nio.file.{Files, Path} +import scala.collection.JavaConverters._ +import scala.io.Source + +@TestInstance(Lifecycle.PER_CLASS) +class ResolveEntitiesTest extends Serializable { + + var workingDir: Path = null + + val FAKE_TITLE = "FAKETITLE" + val FAKE_SUBJECT = "FAKESUBJECT" + + var sparkSession: Option[SparkSession] = None + + @BeforeAll + def setUp(): Unit = { + workingDir = Files.createTempDirectory(getClass.getSimpleName) + + val conf = new SparkConf() + sparkSession = Some( + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master("local[*]") + .getOrCreate() + ) + populateDatasets(sparkSession.get) + generateUpdates(sparkSession.get) + + } + + @AfterAll + def tearDown(): Unit = { + FileUtils.deleteDirectory(workingDir.toFile) + sparkSession.get.stop() + + } + + def generateUpdates(spark: SparkSession): Unit = { + val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString + + val pids: List[String] = template.lines + .map { id => + val r = new Result + r.setId(id.toLowerCase.trim) + r.setSubject( + List( + OafMapperUtils.structuredProperty( + FAKE_SUBJECT, + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + null + ) + ).asJava + ) + r.setTitle( + List( + OafMapperUtils.structuredProperty( + FAKE_TITLE, + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + null + ) + ).asJava + ) + r + } + .map { r => + val mapper = new ObjectMapper() + + mapper.writeValueAsString(r) + } + .toList + + val sc = spark.sparkContext + + println(sc.parallelize(pids).count()) + + spark + .createDataset(sc.parallelize(pids))(Encoders.STRING) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(s"$workingDir/updates") + + import spark.implicits._ + implicit val resEncoder: Encoder[Result] = Encoders.bean(classOf[Result]) + val ds = spark.read + .text(s"$workingDir/updates") + .as[String] + .map { s => + val mapper = new ObjectMapper() + mapper.readValue(s, classOf[Result]) + } + .collect() + + assertEquals(4, ds.length) + ds.foreach { r => assertNotNull(r.getSubject) } + ds.foreach { r => assertEquals(1, r.getSubject.size()) } + ds.foreach { r => assertNotNull(r.getTitle) } + ds.foreach { r => assertEquals(1, r.getTitle.size()) } + + ds.flatMap(r => r.getTitle.asScala.map(t => t.getValue)) + .foreach(t => assertEquals(FAKE_TITLE, t)) + ds.flatMap(r => r.getSubject.asScala.map(t => t.getValue)) + .foreach(t => assertEquals(FAKE_SUBJECT, t)) + + println("generated Updates") + } + + def populateDatasets(spark: SparkSession): Unit = { + import spark.implicits._ + val entities = SparkResolveEntities.entities + + entities.foreach { e => + val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString + spark + .createDataset(spark.sparkContext.parallelize(template.lines.toList)) + .as[String] + .write + .option("compression", "gzip") + .text(s"$workingDir/graph/$e") + println(s"Created Dataset $e") + } + SparkResolveRelation.extractPidResolvedTableFromJsonRDD( + spark, + s"$workingDir/graph", + s"$workingDir/work" + ) + + } + + @Test + def testResolution(): Unit = { + val spark: SparkSession = sparkSession.get + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + SparkResolveEntities.resolveEntities(spark, s"$workingDir/work", s"$workingDir/updates") + + val ds = spark.read.load(s"$workingDir/work/resolvedEntities").as[Result] + + assertEquals(3, ds.count()) + + ds.collect().foreach { r => + assertTrue(r.getId.startsWith("50")) + } + } + + private def structuredPContainsValue( + l: java.util.List[StructuredProperty], + exptectedValue: String + ): Boolean = { + l.asScala.exists(p => p.getValue != null && p.getValue.equalsIgnoreCase(exptectedValue)) + } + + @Test + def testUpdate(): Unit = { + val spark: SparkSession = sparkSession.get + import spark.implicits._ + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + val m = new ObjectMapper() + SparkResolveEntities.resolveEntities(spark, s"$workingDir/work", s"$workingDir/updates") + SparkResolveEntities.generateResolvedEntities( + spark, + s"$workingDir/work", + s"$workingDir/graph", + s"$workingDir/target" + ) + + val pubDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/publication") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) + val t = pubDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() + + var ct = pubDS.count() + var et = pubDS + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) + .count() + + assertEquals(ct, et) + + val datDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/dataset") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val td = datDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() + ct = datDS.count() + et = datDS + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) + .count() + assertEquals(ct, et) + + val softDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/software") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) + val ts = softDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() + ct = softDS.count() + et = softDS + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) + .count() + assertEquals(ct, et) + + val orpDS: Dataset[Result] = spark.read + .text(s"$workingDir/target/otherresearchproduct") + .as[String] + .map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) + val to = orpDS + .filter(p => p.getTitle != null && p.getSubject != null) + .filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))) + .count() + + ct = orpDS.count() + et = orpDS + .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty)) + .count() + assertEquals(ct, et) + + assertEquals(0, t) + assertEquals(2, td) + assertEquals(1, ts) + assertEquals(0, to) + + } + + @Test + def testMerge(): Unit = { + + val r = new Result + r.setSubject( + List( + OafMapperUtils.structuredProperty( + FAKE_SUBJECT, + OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), + null + ) + ).asJava + ) + + val mapper = new ObjectMapper() + + val p = mapper.readValue( + Source + .fromInputStream(this.getClass.getResourceAsStream(s"publication")) + .mkString + .lines + .next(), + classOf[Publication] + ) + + r.mergeFrom(p) + + println(mapper.writeValueAsString(r)) + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala new file mode 100644 index 000000000..80ea9d59c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/RetrieveDataciteDeltaTest.scala @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.sx.graph + +import org.junit.jupiter.api.Test + +import java.text.SimpleDateFormat + +class RetrieveDataciteDeltaTest { + + @Test + def testParsingDate(): Unit = { + + val inputDate = "2021-12-02T11:17:36+0000" + + val t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").parse(inputDate).getTime + + println(t) + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala similarity index 53% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala rename to dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index bd7e4fd09..e92f36896 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -18,36 +18,40 @@ import scala.collection.JavaConverters._ import scala.io.Source @ExtendWith(Array(classOf[MockitoExtension])) -class ScholixGraphTest extends AbstractVocabularyTest{ - +class ScholixGraphTest extends AbstractVocabularyTest { val mapper: ObjectMapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false) + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) @BeforeEach - def setUp() :Unit = { + def setUp(): Unit = { super.setUpVocabulary() } - @Test - def testExtractPids():Unit = { + def testExtractPids(): Unit = { - val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")).mkString - val res =SparkResolveRelation.extractPidsFromRecord(input) + val input = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")) + .mkString + val res = SparkResolveRelation.extractPidsFromRecord(input) assertNotNull(res) - assertTrue(res._2.size == 2) + + assertEquals(1, res._2.size) } @Test - def testOAFToSummary():Unit= { - val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")).mkString + def testOAFToSummary(): Unit = { + val inputRelations = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")) + .mkString val items = inputRelations.lines.toList assertNotNull(items) - items.foreach(i =>assertTrue(i.nonEmpty)) - val result = items.map(r => mapper.readValue(r, classOf[Result])).map(i => ScholixUtils.resultToSummary(i)) + items.foreach(i => assertTrue(i.nonEmpty)) + val result = + items.map(r => mapper.readValue(r, classOf[Result])).map(i => ScholixUtils.resultToSummary(i)) assertNotNull(result) @@ -58,37 +62,41 @@ class ScholixGraphTest extends AbstractVocabularyTest{ } - - @Test - def testScholixMergeOnSource():Unit = { - val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")).mkString - val result:List[(Relation,ScholixSummary)] =inputRelations.lines.sliding(2).map(s => (s.head, s(1))).map(p => (mapper.readValue(p._1, classOf[Relation]),mapper.readValue(p._2, classOf[ScholixSummary]) )).toList + def testScholixMergeOnSource(): Unit = { + val inputRelations = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix") + ) + .mkString + val result: List[(Relation, ScholixSummary)] = inputRelations.lines + .sliding(2) + .map(s => (s.head, s(1))) + .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))) + .toList assertNotNull(result) assertTrue(result.nonEmpty) result.foreach(r => assertEquals(r._1.getSource, r._2.getId)) - val scholix:List[Scholix] = result.map(r => ScholixUtils.scholixFromSource(r._1, r._2)) + val scholix: List[Scholix] = result.map(r => ScholixUtils.scholixFromSource(r._1, r._2)) println(mapper.writeValueAsString(scholix.head)) } - - - @Test def testScholixRelationshipsClean(): Unit = { - val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relation_transform.json")).mkString + val inputRelations = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relation_transform.json") + ) + .mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(inputRelations) - val l:List[String] =json.extract[List[String]] + val l: List[String] = json.extract[List[String]] assertNotNull(l) assertTrue(l.nonEmpty) - val relVocbaulary =ScholixUtils.relations - l.foreach(r => assertTrue(relVocbaulary.contains(r.toLowerCase))) + val relVocbaulary = ScholixUtils.relations + l.foreach(r => assertTrue(relVocbaulary.contains(r.toLowerCase))) } - - - } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java new file mode 100644 index 000000000..a38329750 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java @@ -0,0 +1,158 @@ + +package eu.dnetlib.dhp.oa.provision.model; + +import java.util.Set; + +import com.google.common.collect.Sets; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import scala.Serializable; + +public class XmlInstance implements Serializable { + + public static final AccessRight UNKNOWN_ACCESS_RIGHT; + + static { + UNKNOWN_ACCESS_RIGHT = new AccessRight(); + UNKNOWN_ACCESS_RIGHT.setClassid(ModelConstants.UNKNOWN); + UNKNOWN_ACCESS_RIGHT.setClassname(ModelConstants.UNKNOWN); + UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_ACCESS_MODES); + UNKNOWN_ACCESS_RIGHT.setSchemename(ModelConstants.DNET_ACCESS_MODES); + } + + private String url; + + private AccessRight accessright; + + private Set collectedfrom = Sets.newHashSet(); + + private Set hostedby = Sets.newHashSet(); + + private Set instancetype = Sets.newHashSet(); + + private Set license = Sets.newHashSet(); + + // other research products specifc + private Set distributionlocation = Sets.newHashSet(); + + private Set pid = Sets.newHashSet(); + + private Set alternateIdentifier = Sets.newHashSet(); + + private Set dateofacceptance = Sets.newHashSet(); + + // ( article | book ) processing charges. Defined here to cope with possible wrongly typed + // results + private String processingchargeamount; + + // currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly + // typed results + private String processingchargecurrency; + + private Set refereed = Sets.newHashSet();; // peer-review status + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public AccessRight getAccessright() { + return accessright; + } + + public void setAccessright(AccessRight accessright) { + this.accessright = accessright; + } + + public Set getCollectedfrom() { + return collectedfrom; + } + + public void setCollectedfrom(Set collectedfrom) { + this.collectedfrom = collectedfrom; + } + + public Set getHostedby() { + return hostedby; + } + + public void setHostedby(Set hostedby) { + this.hostedby = hostedby; + } + + public Set getInstancetype() { + return instancetype; + } + + public void setInstancetype(Set instancetype) { + this.instancetype = instancetype; + } + + public Set getLicense() { + return license; + } + + public void setLicense(Set license) { + this.license = license; + } + + public Set getDistributionlocation() { + return distributionlocation; + } + + public void setDistributionlocation(Set distributionlocation) { + this.distributionlocation = distributionlocation; + } + + public Set getPid() { + return pid; + } + + public void setPid(Set pid) { + this.pid = pid; + } + + public Set getAlternateIdentifier() { + return alternateIdentifier; + } + + public void setAlternateIdentifier(Set alternateIdentifier) { + this.alternateIdentifier = alternateIdentifier; + } + + public Set getDateofacceptance() { + return dateofacceptance; + } + + public void setDateofacceptance(Set dateofacceptance) { + this.dateofacceptance = dateofacceptance; + } + + public String getProcessingchargeamount() { + return processingchargeamount; + } + + public void setProcessingchargeamount(String processingchargeamount) { + this.processingchargeamount = processingchargeamount; + } + + public String getProcessingchargecurrency() { + return processingchargecurrency; + } + + public void setProcessingchargecurrency(String processingchargecurrency) { + this.processingchargecurrency = processingchargecurrency; + } + + public Set getRefereed() { + return refereed; + } + + public void setRefereed(Set refereed) { + this.refereed = refereed; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 7487f0956..87c0261ac 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -8,11 +8,16 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; +import javax.swing.text.html.Option; + import org.apache.commons.lang3.StringUtils; import org.stringtemplate.v4.ST; +import com.google.common.collect.Lists; + import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.OafEntity; @@ -94,13 +99,15 @@ public class TemplateFactory { } public String getInstance( - final String resultId, final List instancemetadata, final List webresources) { + final List instancemetadata, final String url) { return getTemplate(resources.getInstance()) - .add("instanceId", escapeXml(removePrefix(resultId))) .add("metadata", instancemetadata) .add( "webresources", - (webresources != null ? webresources : new ArrayList()) + Optional + .ofNullable(url) + .map(u -> Lists.newArrayList(url)) + .orElse(Lists.newArrayList()) .stream() .filter(StringUtils::isNotBlank) .map(this::getWebResource) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 19300d77d..e51a84e02 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -10,24 +10,20 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; +import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.http.protocol.HTTP; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -49,25 +45,10 @@ import com.mycila.xmltool.XMLTag; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.MainEntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.ExternalReference; -import eu.dnetlib.dhp.schema.oaf.ExtraInfo; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.oa.provision.model.XmlInstance; +import eu.dnetlib.dhp.schema.common.*; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { @@ -76,6 +57,8 @@ public class XmlRecordFactory implements Serializable { * */ private static final long serialVersionUID = 2912912999272373172L; + public static final String DOI_ORG_AUTHORITY = "doi.org"; + public static final String HTTPS = "https"; private final Map accumulators; @@ -415,6 +398,16 @@ public class XmlRecordFactory implements Serializable { if (r.getResourcetype() != null) { metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype())); } + if (r.getProcessingchargeamount() != null) { + metadata + .add( + XmlSerializationUtils + .asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue())); + metadata + .add( + XmlSerializationUtils + .asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue())); + } } switch (type) { @@ -1112,37 +1105,64 @@ public class XmlRecordFactory implements Serializable { if (MainEntityType.result.toString().equals(ModelSupport.getMainType(entityType))) { final List instances = ((Result) entity).getInstance(); if (instances != null) { - for (final Instance instance : ((Result) entity).getInstance()) { + groupInstancesByUrl(((Result) entity).getInstance()).forEach(instance -> { final List fields = Lists.newArrayList(); if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { fields .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } - if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { + if (instance.getCollectedfrom() != null) { fields - .add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); + .addAll( + instance + .getCollectedfrom() + .stream() + .filter(cf -> kvNotBlank(cf)) + .map(cf -> XmlSerializationUtils.mapKeyValue("collectedfrom", cf)) + .collect(Collectors.toList())); } - if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { - fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); - } - if (instance.getDateofacceptance() != null - && isNotBlank(instance.getDateofacceptance().getValue())) { + + if (instance.getHostedby() != null) { fields - .add( - XmlSerializationUtils - .asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue())); + .addAll( + instance + .getHostedby() + .stream() + .filter(hb -> kvNotBlank(hb)) + .map(hb -> XmlSerializationUtils.mapKeyValue("hostedby", hb)) + .collect(Collectors.toList())); } - if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) { + if (instance.getDateofacceptance() != null) { fields - .add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); + .addAll( + instance + .getDateofacceptance() + .stream() + .filter(d -> isNotBlank(d)) + .map(d -> XmlSerializationUtils.asXmlElement("dateofacceptance", d)) + .collect(Collectors.toList())); } - if (isNotBlank(instance.getDistributionlocation())) { + if (instance.getInstancetype() != null) { fields - .add( - XmlSerializationUtils - .asXmlElement("distributionlocation", instance.getDistributionlocation())); + .addAll( + instance + .getInstancetype() + .stream() + .filter(t -> !t.isBlank()) + .map(t -> XmlSerializationUtils.mapQualifier("instancetype", t)) + .collect(Collectors.toList())); + } + if (instance.getDistributionlocation() != null) { + fields + .addAll( + instance + .getDistributionlocation() + .stream() + .filter(d -> isNotBlank(d)) + .map(d -> XmlSerializationUtils.asXmlElement("distributionlocation", d)) + .collect(Collectors.toList())); } if (instance.getPid() != null) { fields @@ -1165,32 +1185,50 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } - if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { + if (instance.getRefereed() != null) { fields - .add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); + .addAll( + instance + .getRefereed() + .stream() + .filter(Objects::nonNull) + .filter(r -> !r.isBlank()) + .map(r -> XmlSerializationUtils.mapQualifier("refereed", r)) + .collect(Collectors.toList())); } if (instance.getProcessingchargeamount() != null - && isNotBlank(instance.getProcessingchargeamount().getValue())) { + && isNotBlank(instance.getProcessingchargeamount())) { fields .add( XmlSerializationUtils .asXmlElement( - "processingchargeamount", instance.getProcessingchargeamount().getValue())); + "processingchargeamount", instance.getProcessingchargeamount())); } if (instance.getProcessingchargecurrency() != null - && isNotBlank(instance.getProcessingchargecurrency().getValue())) { + && isNotBlank(instance.getProcessingchargecurrency())) { fields .add( XmlSerializationUtils .asXmlElement( - "processingchargecurrency", instance.getProcessingchargecurrency().getValue())); + "processingchargecurrency", instance.getProcessingchargecurrency())); + } + + if (instance.getLicense() != null) { + fields + .addAll( + instance + .getLicense() + .stream() + .filter(d -> isNotBlank(d)) + .map(d -> XmlSerializationUtils.asXmlElement("license", d)) + .collect(Collectors.toList())); } children .add( templateFactory - .getInstance(instance.getHostedby().getKey(), fields, instance.getUrl())); - } + .getInstance(fields, instance.getUrl())); + }); } final List ext = ((Result) entity).getExternalReference(); if (ext != null) { @@ -1234,6 +1272,110 @@ public class XmlRecordFactory implements Serializable { return children; } + private Stream groupInstancesByUrl(List instance) { + return instance + .stream() + .filter(i -> Objects.nonNull(i.getUrl())) + .map(i -> { + i + .setUrl( + i + .getUrl() + .stream() + .filter(this::isValidUrl) + .map(XmlRecordFactory::normalizeDoiUrl) + .collect(Collectors.toList())); + return i; + }) + .filter( + i -> Optional + .ofNullable(i.getUrl()) + .map(u -> !u.isEmpty()) + .orElse(false)) + .map(this::pickByUrl) + .collect(Collectors.groupingBy(ImmutablePair::getLeft)) + .values() + .stream() + .filter(Objects::nonNull) + .map(this::mergeInstances); + } + + public static String normalizeDoiUrl(String url) { + if (url.contains(DOI_ORG_AUTHORITY)) { + try { + URL u = new URL(url); + return new URL(HTTPS, DOI_ORG_AUTHORITY, u.getFile()).toString(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + } + return url; + } + + private boolean isValidUrl(String url) { + try { + new URL(url).toURI(); + return true; + } catch (Exception e) { + return false; + } + } + + private ImmutablePair pickByUrl(Instance i) { + return new ImmutablePair<>(i.getUrl().get(0), i); + } + + private XmlInstance mergeInstances(List> instances) { + + final XmlInstance instance = new XmlInstance(); + + instance.setUrl(instances.get(0).getLeft()); + instance + .setAccessright( + instances + .stream() + .map(Pair::getValue) + .map(Instance::getAccessright) + .min(new AccessRightComparator()) + .orElse(XmlInstance.UNKNOWN_ACCESS_RIGHT)); + + instances.forEach(p -> { + final Instance i = p.getRight(); + instance.getCollectedfrom().add(i.getCollectedfrom()); + instance.getHostedby().add(i.getHostedby()); + instance.getInstancetype().add(i.getInstancetype()); + instance.getRefereed().add(i.getRefereed()); + instance + .setProcessingchargeamount( + Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null)); + instance + .setProcessingchargecurrency( + Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); + Optional + .ofNullable(i.getPid()) + .ifPresent(pid -> instance.getPid().addAll(pid)); + Optional + .ofNullable(i.getAlternateIdentifier()) + .ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId)); + Optional + .ofNullable(i.getDateofacceptance()) + .ifPresent(d -> instance.getDateofacceptance().add(d.getValue())); + Optional + .ofNullable(i.getLicense()) + .ifPresent(license -> instance.getLicense().add(license.getValue())); + Optional + .ofNullable(i.getDistributionlocation()) + .ifPresent(dl -> instance.getDistributionlocation().add(dl)); + }); + + if (instance.getHostedby().size() > 1 + && instance.getHostedby().stream().anyMatch(hb -> ModelConstants.UNKNOWN_REPOSITORY.equals(hb))) { + instance.getHostedby().remove(ModelConstants.UNKNOWN_REPOSITORY); + } + + return instance; + } + private boolean isDuplicate(final RelatedEntityWrapper link) { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st index 64bed05b4..811d10936 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/instance.st @@ -1,4 +1,4 @@ - + $metadata:{ it | $it$ }$ $webresources:{ it | $it$ }$ \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 64935e79d..86a155292 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -1,9 +1,14 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Arrays; +import java.util.List; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -109,6 +114,30 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + void testDoiUrlNormalization() throws MalformedURLException { + + // TODO add more test examples when needed + List urls = Arrays + .asList( + "https://dx.doi.org/10.1016/j.jas.2019.105013", + "http://dx.doi.org/10.13140/rg.2.2.26964.65927", + "https://dx.doi.org/10.13140/rg.2.2.26964.65927", + "http://dx.doi.org/10.1016/j.jas.2019.105013", + "http://hdl.handle.net/2072/369223", + "https://doi.org/10.1016/j.jas.2019.105013"); + + for (String url : urls) { + URL u = new URL(XmlRecordFactory.normalizeDoiUrl(url)); + if (url.contains(XmlRecordFactory.DOI_ORG_AUTHORITY)) { + assertEquals(XmlRecordFactory.HTTPS, u.getProtocol()); + assertEquals(XmlRecordFactory.DOI_ORG_AUTHORITY, u.getAuthority()); + } else { + assertEquals(url, u.toString()); + } + } + } + private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 2b5e08e92..c1e0567e6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -12,7 +12,6 @@ import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.DeserializationFeature; @@ -54,18 +53,22 @@ public class XmlRecordFactoryTest { System.out.println(doc.asXML()); - Assertions.assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); - Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); + assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); + assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); - Assertions.assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid")); - Assertions.assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending")); + assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid")); + assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending")); - Assertions.assertEquals("doi", doc.valueOf("//instance/pid/@classid")); - Assertions.assertEquals("10.1109/TED.2018.2853550", doc.valueOf("//instance/pid/text()")); + assertEquals("doi", doc.valueOf("//instance/pid/@classid")); + assertEquals("10.1109/TED.2018.2853550", doc.valueOf("//instance/pid/text()")); - Assertions.assertEquals("doi", doc.valueOf("//instance/alternateidentifier/@classid")); - Assertions.assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()")); - // TODO add assertions based of values extracted from the XML record + assertEquals("doi", doc.valueOf("//instance/alternateidentifier/@classid")); + assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()")); + + assertEquals(3, doc.selectNodes("//instance").size()); + + assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()")); + assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()")); } @Test @@ -96,7 +99,7 @@ public class XmlRecordFactoryTest { final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); - Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); + assertEquals("2021-01-01", doc.valueOf("//validated/@date")); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index 91f159853..3b5b54bbf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -412,9 +412,800 @@ "value": "" }, "url": [ - "http://juuli.fi/Record/0331473718", "http://dx.doi.org/10.1109/TED.2018.2853550" ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "Closed Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______3456::b2b9ce8435390bcbfc05f3cae3948567", + "value": "A wonderful repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2020-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "CC-BY" + }, + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853550" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853551" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853552", + "http://dx.doi.org/10.1109/TED.2018.2853554" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "" + ] + }, + { + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "alternateIdentifier": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.5689/LIB.2018.2853550" + } + ], + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|od______2367::dfb9c4r4353ghjcbfbnhf3cyu79484rf", + "value": "Another repository" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c", + "value": "Unknown Repository" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "asdasd://not a URL" + ] } ], "journal": { @@ -864,5 +1655,37 @@ }, "value": "Understanding Electromigration in Cu-CNT Composite Interconnects A Multiscale Electrothermal Simulation Study" } - ] + ], + "processingchargeamount": { + "value": "1721.47", + "dataInfo": { + "invisible": true, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargecurrency": { + "value": "EUR", + "dataInfo": { + "invisible": true, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } } \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index fc0162a9c..13a4803a9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -27,6 +27,22 @@ CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS SELECT * FROM ${external_stats_db_name}.licenses_normalized; +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +-- Usage statistics +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +create or replace view ${stats_db_name}.usage_stats as +select * from openaire_prod_usage_stats.usage_stats; + +create or replace view ${stats_db_name}.downloads_stats as +select * from openaire_prod_usage_stats.downloads_stats; + +create or replace view ${stats_db_name}.pageviews_stats as +select * from openaire_prod_usage_stats.pageviews_stats; + +create or replace view ${stats_db_name}.views_stats as +select * from openaire_prod_usage_stats.views_stats; ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index e4e81175c..947c91072 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -14,7 +14,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -25,7 +25,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -36,7 +36,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -47,7 +47,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS SELECT * FROM ${stats_db_name}.publication_sources @@ -67,4 +67,17 @@ from ( LATERAL VIEW explode(author) a as auth LATERAL VIEW explode(auth.pid) ap as auth_pid LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type - WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res \ No newline at end of file + WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; + +create table ${stats_db_name}.result_result stored as parquet as +select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype +from ${openaire_db_name}.relation rel +join ${openaire_db_name}.result r1 on rel.source=r1.id +join ${openaire_db_name}.result r2 on r2.id=rel.target +where reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index 00a6913bc..e3a33a893 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -8,22 +8,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses @@ -46,7 +46,7 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 8e66e05c0..f46b65171 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -9,22 +9,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as select * from ${stats_db_name}.publication_refereed diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql old mode 100644 new mode 100755 index 0ea4a5adc..9f11fa49d --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1,28 +1,29 @@ +---- Sprint 1 ---- create table indi_pub_green_oa stored as parquet as -select distinct p.id, coalesce(green_oa, 0) as green_oa +select distinct p.id, coalesce(green_oa, 0) as green_oa from publication p -left outer join ( -select p.id, 1 as green_oa +left outer join ( +select p.id, 1 as green_oa from publication p join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' -and (ri.accessright = 'Open Access' -or ri.accessright = 'Embargo')) tmp +and (ri.accessright = 'Open Access' +or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; create table indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit from publication p left outer join ( -select p.id, 1 as grey_lit +select p.id, 1 as grey_lit from publication p join result_classifications rt on rt.id = p.id -where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and +where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; create table indi_pub_doi_from_crossref stored as parquet as -select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref +select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref from publication p left outer join (select ri.id, 1 as doi_from_crossref from result_instance ri @@ -33,7 +34,7 @@ on tmp.id=p.id; create table indi_pub_gold_oa stored as parquet as select distinct p.id, coalesce(gold_oa, 0) as gold_oa from publication p -left outer join ( +left outer join ( select p.id, 1 as gold_oa from publication p join result_instance ri on ri.id = p.id @@ -41,226 +42,56 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; -create table indi_project_pubs_count stored as parquet as -select pr.id id, count(p.id) total_pubs from project_results pr -join publication p on p.id=pr.result -group by pr.id; - -create table indi_project_datasets_count stored as parquet as -select pr.id id, count(d.id) total_datasets from project_results pr -join dataset d on d.id=pr.result -group by pr.id; - -create table indi_project_software_count stored as parquet as -select pr.id id, count(s.id) total_software from project_results pr -join software s on s.id=pr.result -group by pr.id; - -create table indi_project_otherresearch_count stored as parquet as -select pr.id id, count(o.id) total_other from project_results pr -join otherresearchproduct o on o.id=pr.result -group by pr.id; - -create table indi_pub_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM publication p - join result_organization ro on p.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; - -create table indi_dataset_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM dataset d - join result_organization ro on d.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; - -create table indi_software_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM software s - join result_organization ro on s.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; - - -create table indi_other_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - from - (SELECT year, country, SUM(CASE - WHEN bestlicence='Open Access' THEN 1 - ELSE 0 - END) AS OpenAccess, SUM(CASE - WHEN bestlicence<>'Open Access' THEN 1 - ELSE 0 - END) AS NonOpenAccess - FROM otherresearchproduct orp - join result_organization ro on orp.id=ro.id - join organization o on o.id=ro.organization - where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp; - -create table indi_pub_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join publication p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofpubs -from total; - -create table indi_dataset_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join dataset p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofdataset -from total; - -create table indi_software_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join software p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofsoftware -from total; - -create table indi_other_avg_year_context_oa stored as parquet as -with total as -(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc -join context c on pc.concept like concat('%',c.id,'%') -join otherresearchproduct p on p.id=pc.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by c.name, year ) -select year, name, round(no_of_pubs/total*100,3) averageofother -from total; - -create table indi_other_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from otherresearchproduct_datasources pd -join datasource d on datasource=d.id -join otherresearchproduct p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct -from total; - -create table indi_software_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from software_datasources pd -join datasource d on datasource=d.id -join software p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfSoftware -from total; - -create table indi_dataset_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from dataset_datasources pd -join datasource d on datasource=d.id -join dataset p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfDatasets -from total; - -create table indi_pub_avg_year_content_oa stored as parquet as -with total as -(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total -from publication_datasources pd -join datasource d on datasource=d.id -join publication p on p.id=pd.id -where cast(year as int)>=2003 and cast(year as int)<=2021 -group by d.type, year) -select year, type, round(no_of_pubs/total*100,3) averageOfPubs -from total; - -create table indi_pub_has_cc_licence stored as parquet as -select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license -from publication p -left outer join (select p.id, license.type as lic from publication p -join publication_licenses as license on license.id = p.id +---- Sprint 2 ---- +create table indi_result_has_cc_licence stored as parquet as +select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license +from result r +left outer join (select r.id, license.type as lic from result r +join result_licenses as license on license.id = r.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -on p.id= tmp.id; +on r.id= tmp.id; -create table indi_pub_has_cc_licence_url stored as parquet as -select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url -from publication p -left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host -from publication p -join publication_licenses as license on license.id = p.id -WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp -on p.id= tmp.id; +create table indi_result_has_cc_licence_url stored as parquet as +select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url +from result r +left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host +from result r +join result_licenses as license on license.id = r.id +WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp +on r.id= tmp.id; create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; -create table indi_with_orcid stored as parquet as +create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid -from result r -left outer join (select id, 1 as has_orcid from result_orcid) tmp -on r.id= tmp.id +from result r +left outer join (select id, 1 as has_orcid from result_orcid) tmp +on r.id= tmp.id; -create table indi_funded_result_with_fundref stored as parquet as + +---- Sprint 3 ---- +create table indi_funded_result_with_fundref stored as parquet as select distinct r.id, coalesce(fundref, 0) as fundref -from project_results r +from project_results r left outer join (select distinct id, 1 as fundref from project_results -where provenance='Harvested') tmp -on r.id= tmp.id +where provenance='Harvested') tmp +on r.id= tmp.id; -create table indi_result_org_country_collab stored as parquet as -with tmp as +create table indi_result_org_country_collab stored as parquet as +with tmp as (select o.id as id, o.country , ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization join result r on r.id=ro.id where o.country <> 'UNKNOWN') select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations from tmp as o1 join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country; -create table indi_result_org_collab stored as parquet as -with tmp as +create table indi_result_org_collab stored as parquet as +with tmp as (select o.id, ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization join result r on r.id=ro.id) @@ -268,15 +99,82 @@ select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaboratio from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id -group by o1.id, o2.id, o1.type +group by o1.id, o2.id, o1.type; -create table indi_result_org_country_collab stored as parquet as -with tmp as -(select o.id as id, o.country , ro.id as result,r.type from organization o +create table indi_funder_country_collab stored as parquet as +with tmp as (select funder, project, country from organization_projects op +join organization o on o.id=op.id +join project p on p.id=op.project +where country <> 'UNKNOWN') +select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations +from tmp as f1 +join tmp as f2 on f1.project=f2.project +where f1.country<>f2.country +group by f1.funder, f2.country, f1.country; + +create table indi_result_country_collab stored as parquet as +with tmp as +(select country, ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization -join result r on r.id=ro.id where o.country <> 'UNKNOWN') -select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +join result r on r.id=ro.id) +select o1.country country1, o2.country country2, o1.type, count(distinct o1.result) as collaborations from tmp as o1 join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +where o1.country<>o2.country +group by o1.country, o2.country, o1.type; + +---- Sprint 4 ---- +create table indi_pub_diamond stored as parquet as +select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal +from publication_datasources pd +left outer join ( +select pd.id, 1 as in_diamond_journal from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp +on pd.id=tmp.id; + +create table indi_pub_hybrid stored as parquet as +select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +from publication_datasources pd +left outer join ( +select pd.id, 1 as is_hybrid from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp +on pd.id=tmp.id; + +create table indi_is_gold_oa stored as parquet as +(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa +from publication_datasources pd +left outer join ( +select pd.id, 1 as gold_oa from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) +where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp +on pd.id=tmp.id); + +create table indi_pub_in_transformative stored as parquet as +select distinct pd.id, coalesce(is_transformative, 0) as is_transformative +from publication pd +left outer join ( +select pd.id, 1 as is_transformative from publication_datasources pd +join datasource d on d.id=pd.datasource +join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +and ps.is_transformative_journal=true) tmp +on pd.id=tmp.id; + +create table indi_pub_closed_other_open stored as parquet as +select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri +left outer join +(select ri.id, 1 as pub_closed_other_open from result_instance ri +join publication p on p.id=ri.id +join datasource d on ri.hostedby=d.id +where d.type like '%Journal%' and ri.accessright='Closed Access' and +(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp +on tmp.id=ri.id; + + +---- Sprint 5 ---- +create table indi_result_no_of_copies stored as parquet as +select id, count(id) as number_of_copies from result_instance group by id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index bb0d0ac6c..468a42045 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -38,13 +38,13 @@ SELECT substr(p.id, 4) as id, case when size(p.description) > 0 then true else false end as abstract, 'publication' as type from ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, case @@ -53,45 +53,45 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_datasources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and p.datainfo.deletedbyinference = false; \ No newline at end of file + and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index f4fb2a174..9e4edb44a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -49,8 +49,10 @@ compute stats TARGET.result_greenoa; create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_languages; -create table TARGET.result_licences stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_licences; +create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_licenses; + +create table TARGET.licenses_normalized as select * from SOURCE.licenses_normalized; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; @@ -79,6 +81,13 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; +create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); +create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); +create table TARGET.result_result as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; +compute stats TARGET.result_result; + -- datasources create view if not exists TARGET.datasource as select * from SOURCE.datasource; create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; @@ -100,28 +109,12 @@ create view if not exists TARGET.project as select * from SOURCE.project; create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; +create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; compute stats TARGET.project_results; -- indicators -create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig; -create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig; -create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig; - -create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig; -create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig; -create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig; - -create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig; -create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig; -create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig; -create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig; - -create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig; -create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig; -create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig; - create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_green_oa; create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); @@ -132,14 +125,31 @@ create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.i compute stats TARGET.indi_pub_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; -create table TARGET.indi_pub_has_cc_licence stored as parquet as select * from SOURCE.indi_pub_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_has_cc_licence; -create table TARGET.indi_pub_has_cc_licence_url stored as parquet as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_has_cc_licence_url; +create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence; +create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence_url; -create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig; -create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig; -create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig; +create view TARGET.indi_funder_country_collab stored as parquet as select * from SOURCE.indi_funder_country_collab; + +create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_with_orcid; +create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_funded_result_with_fundref; +create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_diamond; +create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_hybrid; +create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_in_transformative; +create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_closed_other_open; + +create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_no_of_copies; + +--- Usage statistics +create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); --denorm alter table TARGET.result rename to TARGET.res_tmp; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 953eaad6a..76a5e5a48 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -38,20 +38,20 @@ SELECT substr(d.id, 4) AS id, CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract, 'dataset' AS type FROM ${openaire_db_name}.dataset d -WHERE d.datainfo.deletedbyinference = FALSE; +WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and d.datainfo.deletedbyinference = false; + and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index 0210dc8cb..dc71f41f1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -38,20 +38,20 @@ SELECT substr(s.id, 4) as id, CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'software' as type from ${openaire_db_name}.software s -where s.datainfo.deletedbyinference = false; +where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and s.datainfo.deletedbyinference = false; + and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index f7b302186..353aa98b7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -37,19 +37,19 @@ SELECT substr(o.id, 4) AS id, CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'other' AS type FROM ${openaire_db_name}.otherresearchproduct o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; -- Otherresearchproduct_citations CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and o.datainfo.deletedbyinference = false; + and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) as id, case @@ -57,33 +57,33 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 378e0f17b..b5e2eb37b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -5,24 +5,26 @@ ------------------------------------------------------ CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid -FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; +FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; + CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype = 'projectOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; create table ${stats_db_name}.project_classification as select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class -where p.datainfo.deletedbyinference=false and class.h2020programme is not null; +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; CREATE TABLE ${stats_db_name}.project_tmp ( @@ -72,7 +74,7 @@ SELECT substr(p.id, 4) AS id, p.code.value AS code, p.totalcost AS totalcost FROM ${openaire_db_name}.project p -WHERE p.datainfo.deletedbyinference = false; +WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; create table ${stats_db_name}.funder as select distinct xpath_string(fund, '//funder/id') as id, diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index b3cbc9b41..a57966abf 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -127,7 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 76d31eb5e..d3935fd4a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -44,7 +44,7 @@ FROM ${openaire_db_name}.datasource d1 LATERAL VIEW EXPLODE(originalid) temp AS originalidd WHERE originalidd like "piwik:%") AS d2 ON d1.id = d2.id -WHERE d1.datainfo.deletedbyinference = FALSE; +WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; -- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. -- Creating a temporary dual table that will be removed after the following insert @@ -82,24 +82,25 @@ WHERE yearofvalidation = '-1'; CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) AS id, langs.languages AS language -FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages; +FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages +where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid -FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; +FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids +where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'datasourceOrganization' - and r.datainfo.deletedbyinference = false; +WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; -- datasource sources: -- where the datasource info have been collected from. create table if not exists ${stats_db_name}.datasource_sources AS select substr(d.id, 4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf -where d.datainfo.deletedbyinference = false; +where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index a1cb46185..c73aa811c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -9,7 +9,7 @@ SELECT substr(o.id, 4) as id, o.legalshortname.value as legalshortname, o.country.classid as country FROM ${openaire_db_name}.organization o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml new file mode 100644 index 000000000..82cf9d3d5 --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml @@ -0,0 +1,1350 @@ + +
+ + + + + +
+ + Graph processing [EXPERIMENT] + Data Provision + 30 + + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + + + set blacklist of funder nsPrefixes from the beta aggregator + + nsPrefixBlacklist_BETA + gsrt________,rcuk________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD + gsrt________,rcuk________,fct_________,nwo_________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + set the number of iteration in affiliation propagation + + iterations + 1 + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_experiment/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_experiment/graph/02_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/beta_experiment/graph/03_graph_cleaned + + + + + + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_experiment/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_experiment/graph/05_graph_grouped + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/beta_experiment/graph/06_graph_inferred + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_experiment/graph/07_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_experiment/graph/08_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/beta_experiment/graph/09_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/beta_experiment/graph/10_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/beta_experiment/graph/11_graph_affiliation + + + + + + + Set the target path to store the AFFILIATION from SEMATIC RELATION graph + + affiliationSemRelGraphPath + /tmp/beta_experiment/graph/12_graph_affiliationsr + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/beta_experiment/graph/13_graph_community_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/beta_experiment/graph/14_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/beta_experiment/graph/15_graph_community_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/beta_experiment/graph/16_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_experiment/graph/17_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_experiment/graph/18_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + doiboost,orcidworks-no-doi,opencitations,h2020classification + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,iis-entities-software,iis-entities-patent + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + false + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_experiment/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + true + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_experiment/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : 'mongodb://beta.services.openaire.eu', + 'mongoDb' : 'mdstore', + 'mdstoreManagerUrl' : 'https://beta.services.openaire.eu/mdstoremanager', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.40:5432/oa_organizations', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'false', + 'workingDir' : '/tmp/beta_experiment/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : 'mongodb://services.openaire.eu', + 'mongoDb' : 'mdstore', + 'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager', + 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.39:5432/oa_organizations', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_experiment/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_experiment/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/clean' + } + + build-report + + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'produceHBM', + 'hostedByMapPath' : '/user/dnet.beta/data/hostedByMap', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/hostedbymap/oozie_app', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G', + 'workingDir' : '/tmp/beta_experiment/working_dir/hostedbymap', + 'outputPath' : '/tmp/beta_experiment/working_dir/hostedbymap' + } + + build-report + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + + + + + + + prepare IIS action sets + + + [ + { + 'set' : 'iis-document-affiliation', + 'jobProperty' : 'export_action_set_id_matched_doc_organizations', + 'enablingProperty' : 'active_document_affiliation', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-projects-main', + 'jobProperty' : 'export_action_set_id_document_referencedProjects', + 'enablingProperty' : 'active_referenceextraction_project', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-datasets-main', + 'jobProperty' : 'export_action_set_id_document_referencedDatasets', + 'enablingProperty' : 'active_referenceextraction_dataset', + 'enabled' : 'true' + }, + { + 'set' : 'iis-researchinitiative', + 'jobProperty' : 'export_action_set_id_document_research_initiative', + 'enablingProperty' : 'active_referenceextraction_researchinitiative', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-similarities', + 'jobProperty' : 'export_action_set_id_document_similarities_standard', + 'enablingProperty' : 'active_documentssimilarity', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-classes', + 'jobProperty' : 'export_action_set_id_document_classes', + 'enablingProperty' : 'active_documentsclassification', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations', + 'jobProperty' : 'export_action_set_id_document_referencedDocuments', + 'enablingProperty' : 'active_citationmatching', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations-relations', + 'jobProperty' : 'export_action_set_id_citation_relations', + 'enablingProperty' : 'active_citationmatching_relations', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenceextraction-pdb', + 'jobProperty' : 'export_action_set_id_document_pdb', + 'enablingProperty' : 'active_referenceextraction_pdb', + 'enabled' : 'true' + }, + { + 'set' : 'document_software_url', + 'jobProperty' : 'export_action_set_id_document_software_url', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-software', + 'jobProperty' : 'export_action_set_id_entity_software', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-communities', + 'jobProperty' : 'export_action_set_id_document_community', + 'enablingProperty' : 'active_referenceextraction_community', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-patents', + 'jobProperty' : 'export_action_set_id_document_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-patent', + 'jobProperty' : 'export_action_set_id_entity_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-covid-19', + 'jobProperty' : 'export_action_set_id_document_covid19', + 'enablingProperty' : 'active_referenceextraction_covid19', + 'enabled' : 'true' + } + ] + + + + + + + + prepare IIS parameters + + import_islookup_service_location + import_content_objectstores_csv + import_content_object_store_location + import_mdstore_service_location + import_dataset_mdstore_ids_csv + oozie.wf.application.path + /lib/iis/primary/snapshots/2021-12-09 + IIS + deprecated - not used + import_infospace_graph_location + + import_project_concepts_context_ids_csv + aginfra,beopen,citizen-science,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,embrc,enermaps,epos,euromarine,fet-fp7,fet-h2020,fam,galaxy,gotriple,ifremer,inspired-ris,instruct,mes,neanias-underwater,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata + + + + + + + IIS main + + iisMainJobV3 + + { + 'cluster' : 'cluster', + 'oozie.wf.application.path' : 'oozie.wf.application.path', + + 'active_document_affiliation' : 'active_document_affiliation', + 'active_referenceextraction_project' : 'active_referenceextraction_project', + 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', + 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', + 'active_documentsclassification' : 'active_documentsclassification', + 'active_documentssimilarity' : 'active_documentssimilarity', + 'active_citationmatching' : 'active_citationmatching', + 'active_citationmatching_relations' : 'active_citationmatching_relations', + 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', + 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', + 'active_referenceextraction_community' : 'active_referenceextraction_community', + 'active_referenceextraction_patent' : 'active_referenceextraction_patent', + 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19', + + 'import_content_objectstores_csv' : 'import_content_objectstores_csv', + 'import_content_object_store_location' : 'import_content_object_store_location', + 'import_mdstore_service_location' : 'import_mdstore_service_location', + 'import_islookup_service_location' : 'import_islookup_service_location', + 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', + 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', + 'import_infospace_graph_location' : 'groupedGraphPath', + + 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', + 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', + 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', + 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', + 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', + + 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', + 'export_action_set_id_citation_relations' : 'export_action_set_id_citation_relations', + 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', + 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', + 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software', + 'export_action_set_id_document_community' : 'export_action_set_id_document_community', + 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent', + 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent', + 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19', + 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes' + } + + + { + 'import_mdstore_service_location' : 'http://services.openaire.eu:8280/is/services/mdStore', + 'import_content_object_store_location' : 'http://services.openaire.eu:8280/is/services/objectStore', + 'import_islookup_service_location' : 'http://services.openaire.eu:8280/is/services/isLookUp', + 'import_content_objectstores_csv': '258755af-0b48-41ee-9652-939c5bd2fca3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ad5f567-386d-4812-8edb-c0922eacd107_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f3b32c75-9077-4788-83ee-c8451215043c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7e780872-eff7-4a03-b5a2-13ad69c01366_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0a0fcd82-ae5c-4a10-af47-5106e881b639_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c16030ef-917e-462a-8414-2a8e2fac7619_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8f3fdd6e-a2cd-49ad-ba93-73c8184190bf_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,909b5ec2-6859-4acf-a86e-22e31933392b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cea77c2a-feba-4c02-ac3c-d51a159ec904_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2db71b6a-4838-4c25-9883-d4689148bb2a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8d7df838-62c9-4adc-8cd3-f7e84f732ff4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f90b105b-d12e-4061-99d3-0fdb85d10258_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e74e2c5-4844-40a4-b85f-b3ef920612b9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ddd7b3cc-01c2-4512-9550-52b2446f7dfa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2a09fb42-38f0-4a7a-a95d-97d81f716e77_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,83b7328f-2ab7-44eb-b213-2d770c4f074e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,99d7d7f0-4f76-4cb5-9b22-3ee8ec28b9c9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4a484614-fd61-4ca5-b520-610335065fa2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,32a76449-27eb-4b5f-b0e9-11cd9af0e035_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9d22aa91-83c6-44d4-b614-943d855734a5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,71ddf676-f6a4-48c9-9d1d-4ca6742e7316_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d1f4546f-e47c-47c7-b19c-6334633b5f7e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,03ebfd29-88a6-4757-9d05-64ff5018b91a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ba437ca2-8071-4835-9615-a14eb1fb9ff8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3cf2373a-8228-4781-b0f2-9b4343487fc9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7818e488-0ba4-4558-8f76-83cacf2f7358_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcfcbfd8-4e7a-46a8-a5ff-281c5fa11767_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b988985-c798-44f1-bff1-5574d2abe8af_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,24d97463-144b-4bb5-b6b6-7f1f5b014833_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5b84e8f6-3ab9-40ad-be6f-7b60b24b9e42_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a8c1d0ec-9f3e-42ba-8748-f9b5e62eb7a1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3cd6626e-8e0c-49ae-89a7-5e1286bddf8e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,097344eb-7312-4096-ab9d-204c6fd8395e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,42e09e1f-352b-4ba4-98e9-0826635009d9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,792e67b2-29ed-40ac-a406-61726f5921ab_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f78c1ada-b747-4862-a68b-45d61613199f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5707d04f-96b0-4e59-8607-5d095a3bd301_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8fce096c-c0d8-410d-9f07-a93ac87cf2a6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2f817fb6-3b23-474a-b137-0c4739c264b6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3a5fb37f-8d49-4432-a294-538af185ac88_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0b17e450-8f41-4edc-8d08-68f644a41c3e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cedeffa5-70ac-4415-9640-fcc997f36aa9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ce93c295-ca6d-48e7-9a28-eaf26cf57cb3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,62e02764-705a-471d-8057-359b0ed3c904_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ac5b5231-e600-43ad-8f91-16a91695d81f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,27c2d995-e379-422a-9e4c-12999d6c9846_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,990226a9-28f8-49da-be60-806e1a09addc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5e940a8b-88df-496e-a856-aa0a949effe4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,73979dd9-ccc7-495f-87ab-be0a6be4fcf0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b8eee4a1-1163-4b02-86b9-13443da6dc14_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6af002f3-cb24-4086-b912-56583e4e9bd5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,75f3089e-1cfd-40f0-bdfd-88c80e5d53f2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d794e42c-93fc-47ce-8fe8-e5e40220f770_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3bb435b7-2cec-43f1-b19e-89656d68b1b5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,82563c20-9d34-4b42-8b18-af629d483133_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,30a18588-80c2-4c10-ac6a-7823a47bbea4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,74373d10-aa93-4a81-b4a7-928801073f3d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6002b6ef-1cb3-4277-8e6b-50ca1c6b2bc0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bc097067-e9c6-44ef-9251-1dc1a9f4dd0a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,70d8e385-b668-43ac-9b08-a0e2aa6b0b3b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f3550954-6dd4-4f70-b2b2-fa332f9cf0f9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c3fd0f58-164c-420e-a0c1-45cd5de7dc8f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f8df0721-fbec-4d2c-be7f-9570406adcac_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2a51aa1c-16b2-43e4-bb84-ee353ddf77df_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f68c874d-dd8e-4006-ac73-094f6179d0ca_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,81f561d7-1008-4e85-8f49-3c862cc5257d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7f30c022-1bb8-4752-81cb-2ce948353930_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,178a2b94-08e6-44c3-b8ba-e69329681e2b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3b37b1bc-64d5-4d41-b9e2-63234421d098_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a88973fd-62ce-4284-a56f-c5197123d8f9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e9f27bb4-0180-483b-903f-44358856aec7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,75b967d0-92a9-4a02-96e6-2ccfd39f5305_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ca01e02c-630c-4e69-b9c1-28e3196dd383_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,212aec8e-a1c0-4e82-b46c-5dce0feaa560_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,75b66f48-57f6-4920-9493-b667be192659_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcd93176-ec23-4676-9149-3fe525435943_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5c75e6ff-31b0-41f7-b73c-bee9adf3fa60_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3ed071cf-f00f-4d9d-a0ba-8b50eaebeb2c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ea1c9a3d-369c-4658-b003-51d864c26659_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b203ea92-1a60-41c9-98bd-6f9f6d5a5f03_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,df08df2b-e184-4b58-8c7b-13ee24caf292_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1b7cee3d-4eb7-48d2-bd92-8166bc6bd61f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,521bc388-5d9d-43ae-9650-15767fae368e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,192bf304-42b0-4f77-93da-ecc2cb2e9c7a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ae6ac56-a0dc-4014-ad35-fa6b060458df_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,217a415a-3b7c-4aa2-a501-8f68c8fa094a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7387fc00-5cbb-417d-a334-f6ae17c1d0d7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b558b01d-e6b8-47fd-92bc-0a11d6e0ae19_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d5cd3825-141d-4c8e-bb6f-a507d81532dc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,36ff4b61-e1f1-4aa9-8149-3cc3f4259308_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,34fdb421-8142-42ee-96b1-1a3ed8850561_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b102bbb2-5197-4773-bb85-adb5f019d53e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,50789366-2c51-4caf-a0ea-1ae9466aad00_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3053ae8a-1a6b-4e4a-966f-968cdd982830_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9c86d5e7-14f6-42f4-9e00-860f235e5572_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7e04231c-9f98-4fcd-9659-865c692138f8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,343a909a-21f4-4daa-9e1a-761c2e7ee2b1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,389567f2-c640-40ce-a14d-089557398f26_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1f08eaaa-f898-4c2c-a828-f0f7ce458835_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6802f369-db89-4528-b5ec-f163b29ad804_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,45767cd1-d258-44c8-9498-48b63c836ea0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b135c9a-7516-4095-bca0-6cd09029ee25_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8c2c2853-b126-45b4-b7c4-27a53490ec1d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,88e4d72b-81a6-4b4d-93a7-57dbe6315fc0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cb8e86be-c771-4060-a7c0-87ea34056bc7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,39513d48-6f56-44a5-9df7-252897877baa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,80d6cbdd-2068-46a7-aae4-5b7978f34840_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,172957e8-5e2b-4ef7-be9f-d53159580fbc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,23638864-2e5e-448d-bd43-ddc78ed00129_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,268c97ff-4374-4720-bee6-8ac03f934093_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,05c9ad76-ac3d-418e-88fb-0ebd28b865c7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3cab6d9f-12d4-427e-baa4-992ff1e89d96_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,85624194-48c2-4f78-9dac-bd97afcdac97_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,333f85b4-4334-4eee-a137-41a889033f46_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,00299a55-13ef-4aca-a759-22982c0cae59_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9dce6dcf-dbea-4d9c-a3cf-d0c0c732d863_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f9a0d2cb-2657-4370-bbcb-0c7634a8d462_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5193e9c6-766b-456f-a5f3-6fedbdd2bbad_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,06f8b1d5-7e21-4ce2-abce-3baa5ac840be_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c5b6d4a2-892e-4ceb-a39c-ceb841739d9e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,10ecbc8b-ea58-4133-9189-896f732114f4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,881431d5-50c0-4983-bedf-09ba7b80543d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f3bc9690-d4ac-4c28-9356-bfbafea2b9f6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a3e22dcd-214b-4204-9de4-9d19e16e9dbf_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a065ba53-325a-4f0e-ac1c-1322b5f2b52f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,31ffcce8-2ba5-4f60-bf59-48f008d82ef3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2f440571-f9e9-4599-b952-446f7ab24682_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ec705b5f-f361-4a6f-8817-ef8c027c3c19_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fccd2c7c-a889-4030-b827-7bda953b90e6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b0b1a376-7c0d-438e-b1b3-86660668e11b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,acb7a4f3-abf5-4cc7-a1fc-2d27882c2e06_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,823addf2-fcc1-4007-96e8-d11806c1cffe_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0ce49a90-d900-4be5-bec6-2c64c4131817_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,79126722-17e9-4e3c-b726-cd2d010c97b9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3789780d-5fa3-4480-95b3-1a83fc0af151_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,879bbbfc-2d87-479c-a81d-8750954f0445_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,07e10331-ed9f-4173-baeb-510381815081_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1bfc3890-b54a-4072-90e6-93ba1a2da3ad_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1c85cbda-5e2e-4db9-ba93-408b84c90bd8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f9c06301-29c4-4559-8bf9-220173597a7a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e0aec0ac-d954-4bae-827f-e8908c5fbd4b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,91dde45b-158c-4e81-b7e1-108dc68dcf55_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,877113b2-affd-4a70-b67b-fe9b555399ff_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,16887dca-84c7-49f6-a352-5cb9a80e1e24_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b10c7251-0b77-4a9e-8ffd-4af48f66b6ee_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2563b673-7db2-476c-ab72-1538aebbc528_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d29af09f-6d1d-4afb-9409-d2b001bfeee1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,276c2099-31b2-4413-ad7f-efb8ae3b7f0f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6e2e600f-614c-4405-b93d-6206630a5771_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ab5b738-5a98-4dc1-a69e-bf42270defb4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ce6b6847-01eb-407f-bcff-e215702be3cd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c1e0ee7c-58ce-4430-8ffe-dc80c5c4ba8c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,af0edc23-2ba1-4fb9-a3b4-9349d2ddb09a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e416ca69-df34-424e-8589-4c2c6973dcbd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,98f665d7-662f-4a87-8b44-cb0bcc40ceb3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,67550112-8af0-4b99-ad43-1c484811b986_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,00846284-de91-42a6-9bbc-0cb5c574ea44_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6e3b2a99-a96f-4cbb-af06-36f6402ff8dd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,01919ec6-2a78-48b0-8ad0-e68f395d28ff_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,02297d7f-96c1-4191-89d6-fc5a01754c66_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6df51c9f-618e-433e-8db9-ccb302f8985f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c4e0a3a4-9c00-4625-bb0d-595d217d9159_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d7fac7fc-01b4-4f1f-a751-cf108bbe8662_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4153debc-f82a-4fc6-814a-4b47d8252eec_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,96990cb0-046f-47df-bd03-af19ed27f8c5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,60c83aed-9dc7-4f15-8cc5-25cdfcb67078_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,92f9e507-a0cb-4e40-93e8-73af84ae9d61_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c97e4d00-7c5f-49ab-b562-4e136e07de56_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c2457314-928e-4919-b82b-922d8449f3ef_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7ddd9cfe-ddcf-40ac-abfb-90f47f8f40b8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,99535b31-190b-474a-8597-1b2b89e7509a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a8ddcc4d-3b31-40ed-8126-781698bcdfd8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7735efdd-9bd4-428b-964f-e1ebabc30641_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9e5c6bc9-8e0f-491c-9f61-81a180dd7ee9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,40519d2f-4bcd-40ac-88af-106be7bc3955_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,dad827f3-5e0c-4ab2-b12b-178dceec35d5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,dffd4e39-ace7-4640-9b32-51d41c42ee35_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8bc564b6-c341-4245-b409-01b17bf9b0c6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cfcbca42-1a98-407d-aea2-f4c6d412b634_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6d277175-f0e9-4838-a4c0-17b6067ec061_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,245a0fc3-d074-4c7f-9351-ee61aa35ddb1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,66883fbd-e658-4b39-ab3d-8d08e27789e1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8fa62afa-53f5-4d1d-b05f-773c62fc7b87_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d45be243-514a-473f-9648-ef696e41b6d5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6a9b4f86-cdd1-4fa2-ae0f-71a8bd22661d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fb62b38b-25b6-404e-8636-956de83c5394_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f05821b6-b895-47df-bbbc-a0412abefa32_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,55c0f5c6-d4e8-47b0-a093-0ed389da1d33_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e4d01f17-5209-4bed-8e49-bdc0a39985d6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9f562259-c706-4c32-beb5-deb7a5aa449b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9704d935-077e-4235-ac73-75b1eb7662a1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,392c57e8-c9ff-4fb1-84b4-6e97d586b229_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4915b64b-cbbe-4b67-a4f1-7e35cb30ad8a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0db9404c-0222-42aa-bd84-aeac28e6a50c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,579a8c3a-d5de-4ac9-97c0-1e8decf4f781_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,76fb60c2-9e3b-4506-8011-bb14229ec67e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,19afa842-8f2e-4bb1-860d-f39eccd96b18_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c80b36d7-0945-4a75-a299-32c83893d19b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f89add62-eb50-401b-90a8-9441f105ca17_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,214a7942-8ff7-43c2-91be-32e6ebb67488_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f476dd49-671a-44bd-9b57-d3f6b3b15d1f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b928b5aa-3e69-43db-805f-1250abaf0456_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0271e5a0-b086-4f92-b0c8-89d7066add6b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,90e71a3b-9ea9-47be-bbf9-9a9df6b4747f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9b6b02f9-ca49-4896-bebf-d263297ab230_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6301df3f-2da8-4f16-b01c-be02340d9713_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,df85bb2c-7c83-4747-bfae-b1fd20dbc001_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0765fc2e-65e1-4510-bebe-7c9276221340_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7a39bcf1-c5e8-4eea-aadb-a2b4c43a159c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e6478ddc-5290-492d-a571-6a2e621a06c6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f0211b1c-9a1c-40f2-85e8-5e22b1d29dc9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9517683c-c63e-4914-88e8-70127e57fab8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7a705f7c-9252-4724-a3de-ead9413e68f0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,abc008fb-4648-4b92-9213-559f609e9e8c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e82a5c42-798c-4314-8d07-e51b975f9167_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c8309a3f-c374-4974-96cc-3bab9107cf36_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7ab041de-fac8-4914-add0-cbd1c5fabbdd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4a7ff1b1-43a3-4204-a026-3d7ac5aa3935_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,44fcd8ab-1b6b-4642-b1ba-f0cb9d5e1896_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,947d5bd8-2a5b-497f-bfff-26df1ca121cc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,14d784ac-b5ee-48c2-a360-4a4d9ec3acb9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,afd138ca-c90d-4220-82fc-2baf12364bdc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,213aad7f-e540-4eb6-b262-9c30cd83e848_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3b6082c5-2489-4209-ab3a-9bd8ea9be071_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2c111602-4bf5-49c3-8ad1-cc0db22242e1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bab223d1-1845-4a12-b345-8a26765c4213_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,97fa0e7e-f45b-4338-b983-726eaa7c364b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,55703b4b-7a6e-4088-ac81-5d43b6d19b33_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,03b240d3-fb71-4474-adab-ab48e7e2cc15_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a9279aea-743e-4890-b520-4f430d04716c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9528eee4-d154-4bf0-adc3-e7412d543da0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,58f3d18e-ef7c-4b21-9816-36b3f6beba55_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cfa16256-66b8-47cf-9279-b733da0f79a0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e2ea667-9887-414b-96f6-979eb76b35b4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,59afef00-c880-4e9b-88ca-8c2379b117ae_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,09685143-e732-4ce5-bcb5-4f4917f0200c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bb924eae-dc4d-474b-947b-3476acef5f27_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e1fe865-eaf7-4d09-8817-e53b2727917b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f98ee989-651a-4e7c-b092-9bcfb95529e0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,66f1c2b1-15aa-4edb-ab32-f49ce982b306_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6ba83161-8935-4c33-9dd5-4fff396a217f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,93c22207-5753-42eb-a479-b971a80b3137_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c4e74e75-9f6e-4b1d-bc01-878b823a6b76_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,26755930-7ed3-417c-81ac-bf93fbe51e57_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a770df9c-1ff2-4922-87af-d0d769378c2e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d3ffb188-6155-4f23-8666-2ae6d171f3e9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,65c55bd6-158a-4753-8aa9-03b4574f93ce_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,42d3fa69-20b5-44f3-a1b8-5f8828bbb9c9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f015f8a5-ee1c-439a-b545-8e50d45e95bc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,04a1a309-bc60-4f2d-9e5e-5d852a9ffa63_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4e1e3d94-79c5-4c3f-ac6b-e9bff29c4f0b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,55c3d882-5e9f-4db6-99b0-6b20ef388a69_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e5ab2419-aa6a-4222-9c49-393d9823f6d6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,71a6d543-9b9b-4e23-8e05-8e8d81f0fd7e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4cc9528a-882d-4cdd-b43e-eb120b0050b2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,70799645-2100-44d8-a37a-a1ef6115a9a8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,26964221-d113-4c24-ac92-4e870f647dd1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,abd48a12-9e16-46ca-a1db-0cd30e3899bc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0e768d9c-0e6d-4ead-846a-dc732624be65_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5ebea5b7-4667-4c77-b34b-1ec2fdb41e87_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcd35a53-64c6-47e6-a9ab-8b0c56aff4ef_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,593f01e4-86b2-4e38-a9d9-5c617703fe97_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,620fa76b-9033-418d-996e-d2da91f36e4d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,96ce7f94-4ecd-42e1-a96f-16d5eb4cfbb2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b965d1a7-d911-428e-98cf-de78a25b4cf6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,39f07181-4e67-4a0b-aa13-a625789834fa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1353dd10-b088-4ae0-9426-dd09a54af7cb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ab5aed4-b8eb-455b-b8ad-55566c9592d7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9ba73764-bb52-44c0-b5eb-f272b89aa2f5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8fb38f2d-8a73-4572-9694-72d85478c6f3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,65b6f6e0-c5f9-45f6-8cdb-2fa892cf0b31_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3fa68746-d403-45b0-9440-3820ab2abbe9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,68ffa111-c0ef-411b-9fd5-e8ac0bee1771_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d16b49af-3113-4a4c-a709-e860c8810eaa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c31e4305-5d8c-473e-b4e9-bad6af132857_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,773a1004-6aec-4196-98f6-135a08c6fe74_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fbdab11a-70b7-46dc-91e4-9228cb9d15e6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c590e698-eebf-448b-9cce-a7cce759a2eb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fc0cf5ba-1294-4d15-9671-620d0f50c80e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0327e908-54f3-470d-9300-fbf9d58fcfc8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,554b305d-6112-446a-8de1-24da5da45847_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b3fb2842-a6ba-4208-8203-78b8bf6b380b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a87e2d54-4801-425a-a2ff-085d199d2165_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e5390536-1946-421f-8427-fe1440403ea5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7475a216-56da-44c7-92f6-ff1368ff8167_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9795c4f7-21c4-4876-9ab7-d1c717fc2985_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,50ca9829-300a-4bf6-96e5-33f1fb81d4d9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3b2ce259-00a1-4fd2-b4d5-8e1a6915cced_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e43a557e-56d2-4d4d-8fbe-dd54140842d6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1eb5e1ee-ef00-446f-bf8e-e6e86d1c0911_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9f0b8522-96bf-41eb-b3af-703530077f72_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,522e4345-f690-4b76-8ef2-fbce695debfa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f77c9673-1b3e-416d-b935-e0b0cade1ddb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7fcb1dcb-8cb3-40f0-a965-dcbfa6cdce0c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e7accb29-1624-4731-8ada-666be2bc9ca4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3f92004a-6fc8-4a31-a427-6a7035d9cf02_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,13b4cf1a-37e2-4cdd-b08d-14352cef6ba7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9174da20-ff65-4875-8a36-7bb201a71ac3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3bcea802-91c8-418c-9f1a-c136ed4513cd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,32c27158-2a5f-4ce2-acf8-be57fe7087fe_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c924ea03-1721-4db9-a432-bf917b18e57e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6325fe16-5f6d-45fa-ad4b-83ace4b371a7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d53d61a3-be3b-4a59-9051-3372768ebb7b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cc8f9cce-b55f-4d54-8e65-8481fe6c1f1e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9c73db00-24fe-4d1b-8099-18f47089acf1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,37fc21cb-d23e-45e5-9959-82135d658ba6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4ebf4c19-0401-4d2f-b578-692c6962840b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,18e76570-2d08-4457-bf8a-c3a8d14fe346_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fd0b3cb1-bfc4-4679-a320-2277c2cf7181_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,01336277-976b-41c4-92b6-507f3716fa7b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cde297e1-54ee-44f5-99ad-23938b40ef89_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e42e429-23ce-42fc-be9a-77b99d742cd2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9b7eb529-d04e-4301-9078-fb02b79ee6e7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ef21449b-bde0-46f1-91a0-720a2c9921bb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fa856b69-08e0-4e99-8317-b2abdf007444_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e557faae-63d7-4439-b32b-d11cdedd1820_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a749af27-1fdd-4a6f-99f4-dbb9f05ba51b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,54a3ee50-46fc-492e-83f8-e3be5cf34a8f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,68ff4d3c-e1c5-443a-a575-6ba47c77c517_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,af10fbe8-8608-4801-890c-f6c01efbe0bb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0d0901b3-daeb-4b16-8915-fdf6fc0d0521_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f1a05918-9c80-4591-b889-a338feedd0d4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,627f58cb-59ee-4846-991a-45e16c7cc3b8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fe58f033-823e-473d-8234-8ac5dc22f347_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2a93d54b-2c70-4f8e-935e-ffea440a4324_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,633f943b-f8a5-4702-8bc2-39df981ace6f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,82e48289-28f3-4fca-89fe-ca0655b8db87_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2f0894c1-2c82-4ebf-b8d6-07987369ece7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f63e4258-a3fb-4383-ad43-d415ca032b2a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1568c709-b306-46df-8630-73d7aab57a9c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8f4bdef7-6301-44e3-884f-1e037ee4ea4c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d504551c-e608-43d2-be64-1c30067d3396_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,684c4d40-8f74-4965-a18c-8204c6f2d466_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,54e0a050-e716-40ba-bc97-ae80d0df9c8e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0a443e4c-8b18-4f6c-99f9-9fddabd13986_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3e1838b1-3485-421f-8806-57e14f46f00f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7737dd48-a89a-4fe7-ba95-29445391baed_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d4cb47b7-ac3f-4034-bc7f-021b62ee7612_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,09f6b0dd-fdf5-4674-888e-5e75291a4181_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fb45b9f6-e3fb-440e-9459-b15df356372b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a355978e-abc2-4f1a-83f5-20e8d4189e91_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,aa11b8f2-1c33-404c-8227-1d27fa948a01_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,53a09ffb-b8ef-4be9-9363-6940aadbf414_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e29fc1b0-db4d-4f89-88d1-bdecef46546f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7157d5dd-464a-4143-941a-c99d1ed25886_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f9a1f806-0ced-4b22-9b33-6e52296f47d9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,208c6e8c-ad86-4e69-9805-8342de977835_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,682feb2e-563c-407a-9480-32a260c819b3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b9ae064d-e76a-44e6-aab5-2c0de2d4aea7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9be76f8a-ce7a-4d15-bd3b-2d182a265c1a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b5e7343-be40-4505-844a-7cd763358215_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,57b8dfa5-154a-4e6e-8409-5de1e4d3e12e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,df135a7e-1cf6-487e-b231-d0130cbb3e94_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,99641569-0e73-482b-a4b9-c880ec2ca281_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,68c903e3-c0a5-4cc0-bec1-8f86f98b78ba_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,972456e7-148a-4849-8d1a-33d12ee57fac_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5f5a112c-c38d-4973-acf1-4aefbb6608cd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2cbead31-3ad5-4533-97a9-57169ae2d18d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,67098888-0feb-41f6-9dc0-49b11436eef3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1e28984d-b120-45b0-ad4e-91b563186e2f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,50a2a72b-ea21-4ee8-8956-0d35ae1eb2e2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,07413001-9767-4a53-94bc-d911ad6bf9fa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6f93032a-936f-4f70-8e6c-cd4935dce538_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,492066de-7eda-4ea2-bed7-84f498138b82_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,be1b0d20-b721-4d91-beb4-b34a2c30ea92_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ef1b59bd-5245-4e24-ab34-6bcbdedc69a7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,02fa6adf-4cc6-4f99-8368-08da9b78885a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cd6e7a8d-468b-43c1-95ae-8d869691787f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ce7127d6-99fd-42be-b86c-1fcc9c1dc0c6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a96337dc-fa05-4968-9ba0-33a981278e37_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,542e43fb-2a43-4185-b308-5967ad9d9ac2_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,5137ca31-2ac8-41e9-ada9-4dc20f3839bf_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,36618a76-f6a9-45e9-a097-1b50a236afe6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,06f1c21c-9509-49e9-8032-7f83e426df55_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,058a8510-05ea-4d44-a12d-91b759ff50dd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bee33dd6-dea6-4a82-a30c-362adb0045c1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,736fb544-c839-48d1-bcb6-aac4c0b8f995_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a98acfba-1f27-448b-a625-42263b985cdd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,71de8115-0290-42f9-8101-fd01953bb93b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2913194c-f136-429c-8414-9f06156334e5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,44e6daf0-db3f-4210-92f7-8e39054584b1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,63140bbb-ad8a-429c-b66c-9f08a8cc2d6b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4630a2a0-69ef-4558-896f-402d4820aa1f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9a04b295-8738-40ee-a9a9-155abe853cd4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b0159d0d-42e9-4fee-9e88-3711bfa5180d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,1f295a0c-1ca6-40b3-844e-cdff57520cc6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2890a3d5-0b79-48b9-874f-f157911b7bda_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f4f56e08-58d6-4bfa-a747-68f0ab436afb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,612889ab-8e79-465a-9e76-ff881f74932c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,0df9c398-9133-41ca-a8d8-6dd1df3475c8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,bffbeb98-b8b1-4186-9ebe-440a3533a5d7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6d4119b2-8139-4a60-9080-cb5dc22b58ef_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,600ae3a4-d4fc-4146-9002-d225ffdb25aa_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9b1acfe1-28e1-4d8d-935d-f99d1cd23c8b_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d2d82597-b8a5-4ee7-a3e2-16df33095de9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,81174703-1336-408b-b740-1566e032df53_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,747cece6-1b2b-47b6-850a-143b92b034ea_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b5f5c68d-d68d-4b53-8c2c-8daa765dc252_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d74fbcd9-6662-49c5-a809-9d892ae17c37_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d19bdb3a-07eb-497b-af16-0700a2ac0bad_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2ff2d259-85ca-4227-a581-4270d1e2188f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,153fdbf9-5594-4425-a05c-5e804c01ecfb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,d23e27c8-4696-4c59-9db4-e670acc8b922_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,76b04d5f-68c8-41d1-91f5-bd5e683c69b8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,fcc406ac-80a4-436c-af81-da5659b0ba65_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,14c0d716-4b07-4861-8108-8a98ee259f5c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,8b81d43a-88c4-47dd-af1f-a31795429ffb_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,03bdbb2f-95d8-40c2-be26-06b3153849a1_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,25828ad6-ce19-4869-ab00-f50c8af89912_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f043d6bf-1e27-4700-a099-bd6e1718bfc3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ed98162b-984d-48b5-9069-4e4676c220dd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,74cf3342-d8ff-44c4-9b6c-82f32804fbb5_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,06857595-9a09-4ba9-b520-03962f211782_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a98cfd91-a351-49cf-a901-2b731aed3d17_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e2d0c4bf-c496-4637-9b36-c9989a8b26e0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ddbd3cb5-b244-425a-a975-94fb8eba5a85_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a710a888-42f2-4885-bfe7-dfb95b62494e_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e7cba260-b7dc-43b8-922f-5686a7daa57a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,7b3ce677-ce27-4a26-b08a-361054209447_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,be4040a8-1ab6-4e07-bfe6-b054fe1e26d3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e33a1a7e-9a38-451a-b1b7-a7677059e691_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a855a42c-f254-4749-a2e5-1053d5518ea9_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f6af618a-c667-4b57-b67f-947b0f0e5956_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,f68904d3-50f9-424f-a6a8-d643886d01f4_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,28ea697d-8e0c-417d-bd46-f0088c160bf0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,6d74a029-4569-44ec-9e70-68d78cb7386c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2d8a6b94-7bd2-4f80-b60c-5a9f4aafda2c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,62f6b1f4-e06b-4106-bf34-fbaa637b5edd_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,cefb2ed0-5f56-4884-9444-5343e6ff248c_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3da72380-7057-489e-88a9-77160abc124d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2bc98bbc-66e3-4d7d-b113-619cc4ff6ff7_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,af213802-4a8f-44d8-8a9a-db80e39ecb74_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e43c85ac-14e8-4a08-b575-ce9220cd448a_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,9279d3f2-270b-4d23-a653-02573d4a1e85_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,ARCS3PDF-0e73-473b-8502-71793ea55769_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,2dca6fc4-fdfb-42f6-a18c-bfa6b1101ad6_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,4a4cf004-7f89-4362-869c-2b3fbb0c5266_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,18857150-a6f4-4315-8296-bd15ee8b0445_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,646d810f-4100-45da-96b6-47be0c3258ce_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,3239c2d2-7efe-43bd-8c5f-29e3c5f0b81f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b8ff4cbc-a503-427d-bd4f-9ddf74210440_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,681407d2-10e2-41ce-87aa-d96c55c41743_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,82007298-16a8-41a0-876c-a018e24c40b0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,e558703f-7d1c-45b4-b322-79fa83fc6b19_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,01d78f0a-4f3c-4c14-b47e-8a602640e04f_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,915a35fb-8be9-47d9-92b0-130d7303df59_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,c449e02d-cc9a-4737-b20c-fb5f8ba0fbfc_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,62d58ec0-78bf-408b-b6a3-8f0a1477e2a8_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl' + } + + false + build-report + + + + + + + update IIS action sets + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'groupedGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_experiment/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'inferredGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_experiment/working_dir/dedup', + 'whiteListPath' : '/data/dedup/whitelist_beta', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G', + 'sparkDriverMemory' : '6G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_experiment/working_dir/dedup' + } + + build-report + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/orcid', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are in parent/child relationships. The childs to the parents + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'affiliationSemRelGraphPath', + 'iterations':'iterations' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation_semrel/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/affiliationsemrel' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationSemRelGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/funding', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/community_semrel', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/beta_experiment/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist':'10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/blacklist', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20220111_200505_785 + 2022-01-11T20:08:53+00:00 + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml index 2fed35f44..df9528f4c 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_inference/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_inference/graph/04_graph_dedup + /tmp/beta_inference/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_inference/graph/05_graph_consistent + /tmp/beta_inference/graph/07_graph_consistent @@ -95,7 +125,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_inference/graph/06_graph_cleaned + /tmp/beta_inference/graph/08_graph_cleaned @@ -548,6 +578,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -560,7 +639,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml index e5ce3d710..0ea6be341 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -61,11 +71,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_inference/graph/03_graph_dedup + /tmp/prod_inference/graph/04_graph_dedup @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_inference/graph/04_graph_consistent + /tmp/prod_inference/graph/05_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_inference/graph/05_graph_cleaned + /tmp/prod_inference/graph/06_graph_cleaned @@ -347,6 +367,31 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + @@ -359,7 +404,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index f83337b3c..73c44aba8 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_provision/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_provision/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_provision/graph/04_graph_dedup + /tmp/beta_provision/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_provision/graph/05_graph_inferred + /tmp/beta_provision/graph/07_graph_inferred @@ -95,7 +125,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_provision/graph/06_graph_consistent + /tmp/beta_provision/graph/08_graph_consistent @@ -105,7 +135,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_provision/graph/07_graph_orcid + /tmp/beta_provision/graph/09_graph_orcid @@ -115,7 +145,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_provision/graph/08_graph_bulktagging + /tmp/beta_provision/graph/10_graph_bulktagging @@ -125,7 +155,17 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_provision/graph/09_graph_affiliation + /tmp/beta_provision/graph/11_graph_affiliation + + + + + + + Set the target path to store the AFFILIATION from SEMATIC RELATION graph + + affiliationSemRelGraphPath + /tmp/beta_provision/graph/12_graph_affiliationsr @@ -135,7 +175,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_provision/graph/10_graph_comunity_organization + /tmp/beta_provision/graph/13_graph_comunity_organization @@ -145,7 +185,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_provision/graph/11_graph_funding + /tmp/beta_provision/graph/14_graph_funding @@ -155,7 +195,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_provision/graph/12_graph_comunity_sem_rel + /tmp/beta_provision/graph/15_graph_comunity_sem_rel @@ -165,7 +205,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_provision/graph/13_graph_country + /tmp/beta_provision/graph/16_graph_country @@ -175,7 +215,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_provision/graph/14_graph_cleaned + /tmp/beta_provision/graph/17_graph_cleaned @@ -185,7 +225,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_provision/graph/15_graph_blacklisted + /tmp/beta_provision/graph/18_graph_blacklisted @@ -695,6 +735,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -707,7 +796,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml index be6155f2f..205db29a6 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -51,11 +61,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_provision/graph/03_graph_dedup + /tmp/prod_provision/graph/04_graph_dedup @@ -65,7 +85,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/prod_provision/graph/04_graph_inferred + /tmp/prod_provision/graph/05_graph_inferred @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_provision/graph/05_graph_consistent + /tmp/prod_provision/graph/06_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/prod_provision/graph/06_graph_orcid + /tmp/prod_provision/graph/07_graph_orcid @@ -95,7 +115,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/prod_provision/graph/07_graph_bulktagging + /tmp/prod_provision/graph/08_graph_bulktagging @@ -105,7 +125,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/prod_provision/graph/08_graph_affiliation + /tmp/prod_provision/graph/09_graph_affiliation @@ -115,7 +135,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/prod_provision/graph/09_graph_comunity_organization + /tmp/prod_provision/graph/10_graph_comunity_organization @@ -125,7 +145,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/prod_provision/graph/10_graph_funding + /tmp/prod_provision/graph/11_graph_funding @@ -135,7 +155,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/prod_provision/graph/11_graph_comunity_sem_rel + /tmp/prod_provision/graph/12_graph_comunity_sem_rel @@ -145,7 +165,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/prod_provision/graph/12_graph_country + /tmp/prod_provision/graph/13_graph_country @@ -155,7 +175,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_provision/graph/13_graph_cleaned + /tmp/prod_provision/graph/14_graph_cleaned @@ -165,7 +185,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/prod_provision/graph/14_graph_blacklisted + /tmp/prod_provision/graph/15_graph_blacklisted @@ -446,6 +466,59 @@ build-report + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'prepareInfo', + 'hostedByMapPath' : '/user/dnet.production/data/hostedByMap', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + Graph resolution + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution', + 'shouldResolveEntities' : 'false', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '9G' + } + + @@ -458,7 +531,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 22ee77619..143178560 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -15,6 +15,13 @@ This module is the container for the oozie workflow definitions in dnet-hadoop project + + + DHPSite + ${dhp.site.stage.path}/dhp-workflows + + + dhp-workflow-profiles dhp-aggregation @@ -37,7 +44,7 @@ iis-releases iis releases plugin repository - http://maven.ceon.pl/artifactory/iis-releases + https://maven.ceon.pl/artifactory/iis-releases default diff --git a/dhp-workflows/src/site/site.xml b/dhp-workflows/src/site/site.xml new file mode 100644 index 000000000..6b742db6a --- /dev/null +++ b/dhp-workflows/src/site/site.xml @@ -0,0 +1,25 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 71c55d1f0..603a4cf1e 100644 --- a/pom.xml +++ b/pom.xml @@ -620,6 +620,18 @@ + + org.antipathy + mvn-scalafmt_2.11 + 1.0.1640073709.733712b + + + eu.dnetlib.dhp + dhp-code-style + ${project.version} + + + @@ -665,6 +677,33 @@ + + org.antipathy + mvn-scalafmt_2.11 + + https://code-repo.d4science.org/D-Net/dnet-hadoop/raw/branch/beta/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf + false + false + + ${project.basedir}/src/main/scala + + + ${project.basedir}/src/test/scala + + false + false + : git rev-parse --abbrev-ref HEAD + false + + + + validate + + format + + + + org.apache.maven.plugins maven-release-plugin @@ -719,6 +758,10 @@ dnet45-releases https://maven.d4science.org/nexus/content/repositories/dnet45-releases + + DHPSite + ${dhp.site.stage.path}/ + @@ -734,6 +777,7 @@ + sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop UTF-8 UTF-8 3.6.0 @@ -753,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.22] + [2.10.29] [4.0.3] [6.0.5] [3.1.6] diff --git a/src/site/site.xml b/src/site/site.xml new file mode 100644 index 000000000..634a2c154 --- /dev/null +++ b/src/site/site.xml @@ -0,0 +1,21 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + + + + + \ No newline at end of file