diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/HashableStructuredProperty.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/HashableStructuredProperty.java new file mode 100644 index 000000000..aa60b097a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/HashableStructuredProperty.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2024. + * SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package eu.dnetlib.dhp.schema.oaf; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; + +public class HashableStructuredProperty extends StructuredProperty { + + private static final long serialVersionUID = 8371670185221126045L; + + public static HashableStructuredProperty newInstance(String value, Qualifier qualifier, DataInfo dataInfo) { + if (value == null) { + return null; + } + final HashableStructuredProperty sp = new HashableStructuredProperty(); + sp.setValue(value); + sp.setQualifier(qualifier); + sp.setDataInfo(dataInfo); + return sp; + } + + public static HashableStructuredProperty newInstance(StructuredProperty sp) { + HashableStructuredProperty hsp = new HashableStructuredProperty(); + hsp.setQualifier(sp.getQualifier()); + hsp.setValue(sp.getValue()); + hsp.setQualifier(sp.getQualifier()); + return hsp; + } + + public static StructuredProperty toStructuredProperty(HashableStructuredProperty hsp) { + StructuredProperty sp = new StructuredProperty(); + sp.setQualifier(hsp.getQualifier()); + sp.setValue(hsp.getValue()); + sp.setQualifier(hsp.getQualifier()); + return sp; + } + + @Override + public int hashCode() { + return new HashCodeBuilder(11, 91) + .append(getQualifier().getClassid()) + .append(getQualifier().getSchemeid()) + .append(getValue()) + .hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj == this) { + return true; + } + if (obj.getClass() != getClass()) { + return false; + } + final HashableStructuredProperty rhs = (HashableStructuredProperty) obj; + return new EqualsBuilder() + .append(getQualifier().getClassid(), rhs.getQualifier().getClassid()) + .append(getQualifier().getSchemeid(), rhs.getQualifier().getSchemeid()) + .append(getValue(), rhs.getValue()) + .isEquals(); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java index c0ef339bd..49b30897c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -43,34 +43,4 @@ public class CleaningFunctions { return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue); } - /** - * Utility method that normalises PID values on a per-type basis. - * @param pid the PID whose value will be normalised. - * @return the PID containing the normalised value. - */ - public static StructuredProperty normalizePidValue(StructuredProperty pid) { - pid - .setValue( - normalizePidValue( - pid.getQualifier().getClassid(), - pid.getValue())); - - return pid; - } - - public static String normalizePidValue(String pidType, String pidValue) { - String value = Optional - .ofNullable(pidValue) - .map(String::trim) - .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); - - switch (pidType) { - - // TODO add cleaning for more PID types as needed - case "doi": - return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); - } - return value; - } - } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java index 43fb0feda..cd8957764 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java @@ -6,18 +6,11 @@ import org.apache.commons.lang3.StringUtils; public class DoiCleaningRule { public static String clean(final String doi) { - return doi - .toLowerCase() - .replaceAll("\\s", "") - .replaceAll("^doi:", "") - .replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX); - } - - public static String normalizeDoi(final String input) { - if (input == null) + if (doi == null) return null; - final String replaced = input + final String replaced = doi .replaceAll("\\n|\\r|\\t|\\s", "") + .replaceAll("^doi:", "") .toLowerCase() .replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX); if (StringUtils.isEmpty(replaced)) @@ -32,7 +25,6 @@ public class DoiCleaningRule { return null; return ret; - } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 2be4e8e0c..dfa9c5ad0 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -563,12 +563,24 @@ public class GraphCleaningFunctions extends CleaningFunctions { Optional .ofNullable(i.getPid()) .ifPresent(pid -> { - final Set pids = Sets.newHashSet(pid); + final Set pids = pid + .stream() + .map(HashableStructuredProperty::newInstance) + .collect(Collectors.toCollection(HashSet::new)); Optional .ofNullable(i.getAlternateIdentifier()) .ifPresent(altId -> { - final Set altIds = Sets.newHashSet(altId); - i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids))); + final Set altIds = altId + .stream() + .map(HashableStructuredProperty::newInstance) + .collect(Collectors.toCollection(HashSet::new)); + i + .setAlternateIdentifier( + Sets + .difference(altIds, pids) + .stream() + .map(HashableStructuredProperty::toStructuredProperty) + .collect(Collectors.toList())); }); }); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 1d61c87df..2c77c3b37 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -175,7 +175,7 @@ public class IdentifierFactory implements Serializable { return entity .getPid() .stream() - .map(CleaningFunctions::normalizePidValue) + .map(PidCleaner::normalizePidValue) .filter(CleaningFunctions::pidFilter) .collect( Collectors @@ -207,7 +207,7 @@ public class IdentifierFactory implements Serializable { // filter away PIDs provided by a DS that is not considered an authority for the // given PID Type .filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles)) - .map(CleaningFunctions::normalizePidValue) + .map(PidCleaner::normalizePidValue) .filter(p -> isNotFromDelegatedAuthority(collectedFrom, p)) .filter(CleaningFunctions::pidFilter)) .orElse(Stream.empty()); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index ac7694d18..e01813110 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -972,7 +972,7 @@ public class MergeUtils { private static String extractKeyFromPid(final StructuredProperty pid) { if (pid == null) return null; - final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid); + final StructuredProperty normalizedPid = PidCleaner.normalizePidValue(pid); return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue()); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java index 0e2083590..1e591ce72 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java @@ -18,8 +18,8 @@ public class PidValueComparator implements Comparator { if (right == null) return -1; - StructuredProperty l = CleaningFunctions.normalizePidValue(left); - StructuredProperty r = CleaningFunctions.normalizePidValue(right); + StructuredProperty l = PidCleaner.normalizePidValue(left); + StructuredProperty r = PidCleaner.normalizePidValue(right); return Optional .ofNullable(l.getValue()) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index e10d0c500..77453e5f0 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -28,6 +28,7 @@ import com.jayway.jsonpath.JsonPath; import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; import net.minidev.json.JSONArray; import scala.collection.JavaConverters; import scala.collection.Seq; @@ -104,7 +105,7 @@ public class DHPUtils { public static String generateUnresolvedIdentifier(final String pid, final String pidType) { - final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid); + final String cleanedPid = PidCleaner.normalizePidValue(pidType, pid); return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim()); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index bce4b76b5..9cc0698a4 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -29,7 +29,7 @@ class IdentifierFactoryTest { "publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true); verifyIdentifier( - "publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); + "publication_doi3.json", "50|pmc_________::e2a339e0e11bfbf55462e14a07f1b304", true); verifyIdentifier( "publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true); @@ -41,7 +41,7 @@ class IdentifierFactoryTest { "publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true); verifyIdentifier( - "publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); + "publication_pmc2.json", "50|pmc_________::e2a339e0e11bfbf55462e14a07f1b304", true); verifyIdentifier( "publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true); diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json index b1ea01f60..303c023be 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json @@ -29,7 +29,7 @@ }, { "qualifier": {"classid": "pmc"}, - "value": "21459329" + "value": "PMC21459329" } ] } diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json index e7d49eebb..3b7b6590e 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json @@ -13,7 +13,7 @@ }, { "qualifier":{"classid":"pmc"}, - "value":"21459329" + "value":"PMC21459329" } ] } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java index 0ff03f5e1..0973fdf1e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java @@ -48,7 +48,7 @@ public class TreeNodeDef implements Serializable { // function for the evaluation of the node public TreeNodeStats evaluate(Row doc1, Row doc2, Config conf) { - TreeNodeStats stats = new TreeNodeStats(ignoreUndefined); + TreeNodeStats stats = new TreeNodeStats(); // for each field in the node, it computes the for (FieldConf fieldConf : fields) { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java index f6b210a8c..2b96048b4 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java @@ -9,11 +9,8 @@ public class TreeNodeStats implements Serializable { private Map results; // this is an accumulator for the results of the node - private final boolean ignoreUndefined; - - public TreeNodeStats(boolean ignoreUndefined) { + public TreeNodeStats() { this.results = new HashMap<>(); - this.ignoreUndefined = ignoreUndefined; } public Map getResults() { @@ -25,10 +22,7 @@ public class TreeNodeStats implements Serializable { } public int fieldsCount() { - if (ignoreUndefined) - return this.results.size(); - else - return this.results.size() - undefinedCount(); // do not count undefined + return this.results.size(); } public int undefinedCount() { @@ -84,22 +78,11 @@ public class TreeNodeStats implements Serializable { double min = 100.0; // random high value for (FieldStats fs : this.results.values()) { if (fs.getResult() < min) { - if (fs.getResult() == -1) { - if (fs.isCountIfUndefined()) { - min = 0.0; - } else { - min = -1; - } - } else { + if (fs.getResult() >= 0.0 || (fs.getResult() == -1 && fs.isCountIfUndefined())) min = fs.getResult(); - } } } - if (ignoreUndefined) { - return min == -1.0 ? 0.0 : min; - } else { - return min; - } + return min; } // if at least one is true, return 1.0 @@ -108,11 +91,7 @@ public class TreeNodeStats implements Serializable { if (fieldStats.getResult() >= fieldStats.getThreshold()) return 1.0; } - if (!ignoreUndefined && undefinedCount() > 0) { - return -1.0; - } else { - return 0.0; - } + return 0.0; } // if at least one is false, return 0.0 @@ -121,7 +100,7 @@ public class TreeNodeStats implements Serializable { if (fieldStats.getResult() == -1) { if (fieldStats.isCountIfUndefined()) - return ignoreUndefined ? 0.0 : -1.0; + return 0.0; } else { if (fieldStats.getResult() < fieldStats.getThreshold()) return 0.0; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java index 28b3a82af..263504dbb 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java @@ -44,10 +44,12 @@ public class TreeProcessor { TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config); treeStats.addNodeStats(nextNodeName, stats); - double finalScore = stats.getFinalScore(currentNode.getAggregation()); - if (finalScore == -1.0) + // if ignoreUndefined=false the miss is considered as undefined + if (!currentNode.isIgnoreUndefined() && stats.undefinedCount() > 0) { nextNodeName = currentNode.getUndefined(); - else if (finalScore >= currentNode.getThreshold()) { + } + // if ignoreUndefined=true the miss is ignored and the score computed anyway + else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) { nextNodeName = currentNode.getPositive(); } else { nextNodeName = currentNode.getNegative(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java index 70ca1576c..028fa47dc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java @@ -10,7 +10,6 @@ import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.BZip2Codec; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; @@ -29,6 +28,7 @@ import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.DoiCleaningRule; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; @@ -46,6 +46,8 @@ public class PrepareAffiliationRelations implements Serializable { public static final String BIP_INFERENCE_PROVENANCE = "openaire:affiliation"; public static final String OPENAIRE_DATASOURCE_ID = "10|infrastruct_::f66f1bd369679b5b077dcdf006089556"; public static final String OPENAIRE_DATASOURCE_NAME = "OpenAIRE"; + public static final String DOI_URL_PREFIX = "https://doi.org/"; + public static final int DOI_URL_PREFIX_LENGTH = 16; public static void main(String[] args) throws Exception { @@ -98,35 +100,26 @@ public class PrepareAffiliationRelations implements Serializable { private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath, String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath, String outputPath) { - List collectedFromCrossref = OafMapperUtils - .listKeyValues(ModelConstants.CROSSREF_ID, "Crossref"); - JavaPairRDD crossrefRelations = prepareAffiliationRelations( - spark, crossrefInputPath, collectedFromCrossref); + List collectedfromOpenAIRE = OafMapperUtils + .listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); + + JavaPairRDD crossrefRelations = prepareAffiliationRelationsNewModel( + spark, crossrefInputPath, collectedfromOpenAIRE); - List collectedFromPubmed = OafMapperUtils - .listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed"); JavaPairRDD pubmedRelations = prepareAffiliationRelations( - spark, pubmedInputPath, collectedFromPubmed); + spark, pubmedInputPath, collectedfromOpenAIRE); - List collectedFromOpenAPC = OafMapperUtils - .listKeyValues(ModelConstants.OPEN_APC_ID, "OpenAPC"); - JavaPairRDD openAPCRelations = prepareAffiliationRelations( - spark, openapcInputPath, collectedFromOpenAPC); + JavaPairRDD openAPCRelations = prepareAffiliationRelationsNewModel( + spark, openapcInputPath, collectedfromOpenAIRE); - List collectedFromDatacite = OafMapperUtils - .listKeyValues(ModelConstants.DATACITE_ID, "Datacite"); JavaPairRDD dataciteRelations = prepareAffiliationRelations( - spark, dataciteInputPath, collectedFromDatacite); + spark, dataciteInputPath, collectedfromOpenAIRE); - List collectedFromWebCrawl = OafMapperUtils - .listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); JavaPairRDD webCrawlRelations = prepareAffiliationRelations( - spark, webcrawlInputPath, collectedFromWebCrawl); + spark, webcrawlInputPath, collectedfromOpenAIRE); - List collectedfromPublisher = OafMapperUtils - .listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); JavaPairRDD publisherRelations = prepareAffiliationRelationFromPublisher( - spark, publisherlInputPath, collectedfromPublisher); + spark, publisherlInputPath, collectedfromOpenAIRE); crossrefRelations .union(pubmedRelations) @@ -138,6 +131,21 @@ public class PrepareAffiliationRelations implements Serializable { outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class); } + private static JavaPairRDD prepareAffiliationRelationFromPublisherNewModel(SparkSession spark, + String inputPath, + List collectedfrom) { + + Dataset df = spark + .read() + .schema( + "`DOI` STRING, `Organizations` ARRAY>") + .json(inputPath) + .where("DOI is not null"); + + return getTextTextJavaPairRDD(collectedfrom, df.selectExpr("DOI", "Organizations as Matchings")); + + } + private static JavaPairRDD prepareAffiliationRelationFromPublisher(SparkSession spark, String inputPath, List collectedfrom) { @@ -165,6 +173,20 @@ public class PrepareAffiliationRelations implements Serializable { return getTextTextJavaPairRDD(collectedfrom, df); } + private static JavaPairRDD prepareAffiliationRelationsNewModel(SparkSession spark, + String inputPath, + List collectedfrom) { + // load and parse affiliation relations from HDFS + Dataset df = spark + .read() + .schema( + "`DOI` STRING, `Matchings` ARRAY>") + .json(inputPath) + .where("DOI is not null"); + + return getTextTextJavaPairRDDNew(collectedfrom, df); + } + private static JavaPairRDD getTextTextJavaPairRDD(List collectedfrom, Dataset df) { // unroll nested arrays df = df @@ -181,7 +203,7 @@ public class PrepareAffiliationRelations implements Serializable { // DOI to OpenAIRE id final String paperId = ID_PREFIX - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", row.getAs("doi"))); + + IdentifierFactory.md5(DoiCleaningRule.clean(removePrefix(row.getAs("doi")))); // ROR id to OpenAIRE id final String affId = GenerateRorActionSetJob.calculateOpenaireId(row.getAs("rorid")); @@ -213,6 +235,69 @@ public class PrepareAffiliationRelations implements Serializable { new Text(OBJECT_MAPPER.writeValueAsString(aa)))); } + private static JavaPairRDD getTextTextJavaPairRDDNew(List collectedfrom, Dataset df) { + // unroll nested arrays + df = df + .withColumn("matching", functions.explode(new Column("Matchings"))) + .select( + new Column("DOI").as("doi"), + new Column("matching.PID").as("pidtype"), + new Column("matching.Value").as("pidvalue"), + new Column("matching.Confidence").as("confidence"), + new Column("matching.Status").as("status")) + .where("status = 'active'"); + + // prepare action sets for affiliation relations + return df + .toJavaRDD() + .flatMap((FlatMapFunction) row -> { + + // DOI to OpenAIRE id + final String paperId = ID_PREFIX + + IdentifierFactory.md5(DoiCleaningRule.clean(removePrefix(row.getAs("doi")))); + + // Organization to OpenAIRE identifier + String affId = null; + if (row.getAs("pidtype").equals("ROR")) + // ROR id to OpenIARE id + affId = GenerateRorActionSetJob.calculateOpenaireId(row.getAs("pidvalue")); + else + // getting the OpenOrgs identifier for the organization + affId = row.getAs("pidvalue"); + + Qualifier qualifier = OafMapperUtils + .qualifier( + BIP_AFFILIATIONS_CLASSID, + BIP_AFFILIATIONS_CLASSNAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS); + + // format data info; setting `confidence` into relation's `trust` + DataInfo dataInfo = OafMapperUtils + .dataInfo( + false, + BIP_INFERENCE_PROVENANCE, + true, + false, + qualifier, + Double.toString(row.getAs("confidence"))); + + // return bi-directional relations + return getAffiliationRelationPair(paperId, affId, collectedfrom, dataInfo).iterator(); + + }) + .map(p -> new AtomicAction(Relation.class, p)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))); + } + + private static String removePrefix(String doi) { + if (doi.startsWith(DOI_URL_PREFIX)) + return doi.substring(DOI_URL_PREFIX_LENGTH); + return doi; + } + private static List getAffiliationRelationPair(String paperId, String affId, List collectedfrom, DataInfo dataInfo) { return Arrays diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 1d5b35cff..897153762 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -112,7 +112,7 @@ public class SparkAtomicActionUsageJob implements Serializable { .joinWith(datasource, resultModel.col("datasourceId").equalTo(datasource.col("id")), "left") .map((MapFunction, UsageStatsResultModel>) t2 -> { UsageStatsResultModel usrm = t2._1(); - if(Optional.ofNullable(t2._2()).isPresent()) + if (Optional.ofNullable(t2._2()).isPresent()) usrm.setDatasourceId(usrm.getDatasourceId() + "||" + t2._2().getOfficialname().getValue()); else usrm.setDatasourceId(usrm.getDatasourceId() + "||NO_MATCH_FOUND"); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json index 4d85cf26b..b3d1d742b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json @@ -28,13 +28,19 @@ "paramLongName": "dataciteInputPath", "paramDescription": "the path to get the input data from Datacite", "paramRequired": true - },{ + }, + { "paramName": "wip", "paramLongName": "webCrawlInputPath", "paramDescription": "the path to get the input data from Web Crawl", "paramRequired": true -} -, + }, + { + "paramName": "pub", + "paramLongName": "publisherInputPath", + "paramDescription": "the path to get the input data from publishers", + "paramRequired": true + }, { "paramName": "o", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index 4bd6bcc09..7c45234f6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -332,7 +332,7 @@ case object Crossref2Oaf { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats //MAPPING Crossref DOI into PID - val doi: String = DoiCleaningRule.normalizeDoi((json \ "DOI").extract[String]) + val doi: String = DoiCleaningRule.clean((json \ "DOI").extract[String]) result.setPid( List( structuredProperty( @@ -673,7 +673,7 @@ case object Crossref2Oaf { val doi = input.getString(0) val rorId = input.getString(1) - val pubId = s"50|${PidType.doi.toString.padTo(12, "_")}::${DoiCleaningRule.normalizeDoi(doi)}" + val pubId = s"50|${PidType.doi.toString.padTo(12, "_")}::${DoiCleaningRule.clean(doi)}" val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId) val r: Relation = new Relation diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 7603715b9..9f448d48b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -407,10 +407,9 @@ object DataciteToOAFTransformation { ) } if (c.affiliation.isDefined) - a.setAffiliation( + a.setRawAffiliationString( c.affiliation.get .filter(af => af.nonEmpty) - .map(af => OafMapperUtils.field(af, dataInfo)) .asJava ) a.setRank(idx + 1) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java index ac9977a7e..179cbecb5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java @@ -28,8 +28,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; public class PrepareAffiliationRelationsTest { @@ -39,8 +39,7 @@ public class PrepareAffiliationRelationsTest { private static Path workingDir; private static final String ID_PREFIX = "50|doi_________::"; - private static final Logger log = LoggerFactory - .getLogger(PrepareAffiliationRelationsTest.class); + private static final Logger log = LoggerFactory.getLogger(PrepareAffiliationRelationsTest.class); @BeforeAll public static void beforeAll() throws IOException { @@ -74,26 +73,34 @@ public class PrepareAffiliationRelationsTest { @Test void testMatch() throws Exception { - String crossrefAffiliationRelationPath = getClass() + String crossrefAffiliationRelationPathNew = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json") .getPath(); + String crossrefAffiliationRelationPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror_old.json") + .getPath(); + String publisherAffiliationRelationPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/publishers") .getPath(); + String publisherAffiliationRelationOldPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/publichers_old") + .getPath(); + String outputPath = workingDir.toString() + "/actionSet"; PrepareAffiliationRelations .main( new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-crossrefInputPath", crossrefAffiliationRelationPath, + "-crossrefInputPath", crossrefAffiliationRelationPathNew, "-pubmedInputPath", crossrefAffiliationRelationPath, - "-openapcInputPath", crossrefAffiliationRelationPath, + "-openapcInputPath", crossrefAffiliationRelationPathNew, "-dataciteInputPath", crossrefAffiliationRelationPath, "-webCrawlInputPath", crossrefAffiliationRelationPath, - "-publisherInputPath", publisherAffiliationRelationPath, + "-publisherInputPath", publisherAffiliationRelationOldPath, "-outputPath", outputPath }); @@ -104,13 +111,8 @@ public class PrepareAffiliationRelationsTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); -// for (Relation r : tmp.collect()) { -// System.out.println( -// r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred() -// ); -// } // count the number of relations - assertEquals(138, tmp.count()); + assertEquals(150, tmp.count());// 18 + 24 *3 + 30 * 2 = Dataset dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); dataset.createOrReplaceTempView("result"); @@ -121,7 +123,7 @@ public class PrepareAffiliationRelationsTest { // verify that we have equal number of bi-directional relations Assertions .assertEquals( - 69, execVerification + 75, execVerification .filter( "relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'") .collectAsList() @@ -129,21 +131,21 @@ public class PrepareAffiliationRelationsTest { Assertions .assertEquals( - 69, execVerification + 75, execVerification .filter( "relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'") .collectAsList() .size()); // check confidence value of a specific relation - String sourceDOI = "10.1061/(asce)0733-9399(2002)128:7(759)"; + String sourceDOI = "10.1089/10872910260066679"; final String sourceOpenaireId = ID_PREFIX - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", sourceDOI)); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", sourceDOI)); Assertions .assertEquals( - "0.7071067812", execVerification + "1.0", execVerification .filter( "source='" + sourceOpenaireId + "'") .collectAsList() @@ -151,11 +153,34 @@ public class PrepareAffiliationRelationsTest { .getString(4)); final String publisherid = ID_PREFIX - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9")); - final String rorId = "20|ror_________::" + IdentifierFactory.md5("https://ror.org/03265fv13"); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1089/10872910260066679")); + final String rorId = "20|ror_________::" + IdentifierFactory.md5("https://ror.org/05cf8a891"); Assertions .assertEquals( - 1, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId + "'").count()); + 2, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId + "'").count()); + + Assertions + .assertEquals( + 1, execVerification + .filter( + "source = '" + ID_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue("doi", "10.1007/s00217-010-1268-9")) + + "' and target = '" + "20|ror_________::" + + IdentifierFactory.md5("https://ror.org/03265fv13") + "'") + .count()); + + Assertions + .assertEquals( + 3, execVerification + .filter( + "source = '" + ID_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue("doi", "10.1007/3-540-47984-8_14")) + + "' and target = '" + "20|ror_________::" + + IdentifierFactory.md5("https://ror.org/00a0n9e72") + "'") + .count()); + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index ed80ed5c5..b690b6228 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -31,6 +31,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; public class CreateOpenCitationsASTest { @@ -280,17 +281,17 @@ public class CreateOpenCitationsASTest { @Test void testRelationsSourceTargetCouple() throws Exception { final String doi1 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-015-3684-x")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1007/s10854-015-3684-x")); final String doi2 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x")); final String doi3 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-014-2114-9")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1007/s10854-014-2114-9")); final String doi4 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069")); final String doi5 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-009-9913-4")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1007/s10854-009-9913-4")); final String doi6 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5")); String inputPath = getClass() .getResource( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateTAActionSetTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateTAActionSetTest.java index 0ec34311f..592c6283a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateTAActionSetTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/transformativeagreement/CreateTAActionSetTest.java @@ -28,6 +28,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; /** * @author miriam.baglioni @@ -270,17 +271,17 @@ public class CreateTAActionSetTest { @Test void testRelationsSourceTargetCouple() throws Exception { final String doi1 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-015-3684-x")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1007/s10854-015-3684-x")); final String doi2 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x")); final String doi3 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-014-2114-9")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1007/s10854-014-2114-9")); final String doi4 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069")); final String doi5 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-009-9913-4")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1007/s10854-009-9913-4")); final String doi6 = "50|doi_________::" - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5")); + + IdentifierFactory.md5(PidCleaner.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5")); String inputPath = getClass() .getResource( diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json index 08dc3f7eb..b5a711694 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json @@ -1,9 +1,10 @@ -{"DOI":"10.1061\/(asce)0733-9399(2002)128:7(759)","Matchings":[{"RORid":"https:\/\/ror.org\/03yxnpp24","Confidence":0.7071067812},{"RORid":"https:\/\/ror.org\/01teme464","Confidence":0.89}]} -{"DOI":"10.1105\/tpc.8.3.343","Matchings":[{"RORid":"https:\/\/ror.org\/02k40bc56","Confidence":0.7071067812}]} -{"DOI":"10.1161\/01.cir.0000013305.01850.37","Matchings":[{"RORid":"https:\/\/ror.org\/00qjgza05","Confidence":1}]} -{"DOI":"10.1142\/s021821650200186x","Matchings":[{"RORid":"https:\/\/ror.org\/035xkbk20","Confidence":1},{"RORid":"https:\/\/ror.org\/05apxxy63","Confidence":1}]} -{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(575)","Matchings":[{"RORid":"https:\/\/ror.org\/04j198w64","Confidence":0.82}]} -{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(588)","Matchings":[{"RORid":"https:\/\/ror.org\/03m8km719","Confidence":0.8660254038},{"RORid":"https:\/\/ror.org\/02aze4h65","Confidence":0.87}]} -{"DOI":"10.1161\/hy0202.103001","Matchings":[{"RORid":"https:\/\/ror.org\/057xtrt18","Confidence":0.7071067812}]} -{"DOI": "10.1080/13669877.2015.1042504", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/03265fv13"}]} -{"DOI": "10.1007/3-540-47984-8_14", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/00a0n9e72"}]} \ No newline at end of file +{"DOI":"10.1021\/ac020069k","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/01f5ytq51","Status":"active","Confidence":1}]} +{"DOI":"10.1161\/01.cir.0000013846.72805.7e","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/02pttbw34","Status":"active","Confidence":1}]} +{"DOI":"10.1161\/hy02t2.102992","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/00qqv6244","Status":"active","Confidence":1},{"PID":"ROR","Value":"https:\/\/ror.org\/00p991c53","Status":"active","Confidence":1}]} +{"DOI":"10.1126\/science.1073633","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/03xez1567","Status":"active","Confidence":1},{"PID":"ROR","Value":"https:\/\/ror.org\/006w34k90","Status":"active","Confidence":1}]} +{"DOI":"10.1089\/10872910260066679","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/05cf8a891","Status":"active","Confidence":1}]} +{"DOI":"10.1108\/02656719610116117","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/03mnm0t94","Status":"active","Confidence":1},{"PID":"ROR","Value":"https:\/\/ror.org\/007tn5k56","Status":"active","Confidence":1}]} +{"DOI":"10.1080\/01443610050111986","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/001x4vz59","Status":"active","Confidence":1},{"PID":"ROR","Value":"https:\/\/ror.org\/01tmqtf75","Status":"active","Confidence":1}]} +{"DOI":"10.1021\/cm020118+","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/02cf1je33","Confidence":1,"Status":"inactive"},{"PID":"ROR","Value":"https:\/\/ror.org\/01hvx5h04","Confidence":1,"Status":"active"}]} +{"DOI":"10.1161\/hc1202.104524","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/040r8fr65","Status":"active","Confidence":1},{"PID":"ROR","Value":"https:\/\/ror.org\/04fctr677","Status":"active","Confidence":1}]} +{"DOI":"10.1021\/ma011134f","Matchings":[{"PID":"ROR","Value":"https:\/\/ror.org\/04tj63d06","Status":"active","Confidence":1}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror_old.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror_old.json new file mode 100644 index 000000000..d7f004deb --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror_old.json @@ -0,0 +1,9 @@ +{"DOI":"10.1061\/(asce)0733-9399(2002)128:7(759)","Matchings":[{"RORid":"https:\/\/ror.org\/03yxnpp24","Confidence":0.7071067812},{"RORid":"https:\/\/ror.org\/01teme464","Confidence":0.89}]} +{"DOI":"10.1105\/tpc.8.3.343","Matchings":[{"RORid":"https:\/\/ror.org\/02k40bc56","Confidence":0.7071067812}]} +{"DOI":"10.1161\/01.cir.0000013305.01850.37","Matchings":[{"RORid":"https:\/\/ror.org\/00qjgza05","Confidence":1}]} +{"DOI":"10.1142\/s021821650200186x","Matchings":[{"RORid":"https:\/\/ror.org\/035xkbk20","Confidence":1},{"RORid":"https:\/\/ror.org\/05apxxy63","Confidence":1}]} +{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(575)","Matchings":[{"RORid":"https:\/\/ror.org\/04j198w64","Confidence":0.82}]} +{"DOI":"10.1061\/(asce)0733-9372(2002)128:7(588)","Matchings":[{"RORid":"https:\/\/ror.org\/03m8km719","Confidence":0.8660254038},{"RORid":"https:\/\/ror.org\/02aze4h65","Confidence":0.87}]} +{"DOI":"10.1161\/hy0202.103001","Matchings":[{"RORid":"https:\/\/ror.org\/057xtrt18","Confidence":0.7071067812}]} +{"DOI": "10.1080/13669877.2015.1042504", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/03265fv13"}]} +{"DOI": "https://doi.org/10.1007/3-540-47984-8_14", "Matchings": [{"Confidence": 1.0, "RORid": "https://ror.org/00a0n9e72"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/publichers_old/publisher b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/publichers_old/publisher new file mode 100644 index 000000000..851263933 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/publichers_old/publisher @@ -0,0 +1,6 @@ +{"DOI": "10.1007/s00217-010-1268-9", "Authors": [{"Name": {"Full": "Martin Zarnkow", "First": null, "Last": null}, "Raw_affiliations": ["TU M\u00fcnchen, Lehrstuhl f\u00fcr Brau- und Getr\u00e4nketechnologie"], "Organization_PIDs": []}, {"Name": {"Full": "Andrea Faltermaier", "First": null, "Last": null}, "Raw_affiliations": ["Lehrstuhl f\u00fcr Brau- und Getr\u00e4nketechnologie"], "Organization_PIDs": []}, {"Name": {"Full": "Werner Back", "First": null, "Last": null}, "Raw_affiliations": ["Lehrstuhl f\u00fcr Technologie der Brauerei I"], "Organization_PIDs": []}, {"Name": {"Full": "Martina Gastl", "First": null, "Last": null}, "Raw_affiliations": ["Lehrstuhl f\u00fcr Brau- und Getr\u00e4nketechnologie"], "Organization_PIDs": []}, {"Name": {"Full": "Elkek K. Arendt", "First": null, "Last": null}, "Raw_affiliations": ["University College Cork"], "Organization_PIDs": [{"RORid": "https://ror.org/03265fv13", "Confidence": 1}]}], "Organizations": [{"RORid": "https://ror.org/03265fv13", "Confidence": 1}]} +{"DOI": "10.1007/BF01154707", "Authors": [{"Name": {"Full": "Buggy, M.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Materials Science and Technology, University of Limerick, Limerick, Ireland"], "Organization_PIDs": [{"RORid": "https://ror.org/00a0n9e72", "Confidence": 1}]}, {"Name": {"Full": "Carew, A.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Materials Science and Technology, University of Limerick, Limerick, Ireland"], "Organization_PIDs": [{"RORid": "https://ror.org/00a0n9e72", "Confidence": 1}]}], "Organizations": [{"RORid": "https://ror.org/00a0n9e72", "Confidence": 1}]} +{"DOI": "10.1007/s10237-017-0974-7", "Authors": [{"Name": {"Full": "Donnacha J. McGrath", "First": null, "Last": null}, "Raw_affiliations": ["Biomechanics Research Centre (BMEC), Biomedical Engineering, College of Engineering and Informatics, NUI Galway, Galway, Ireland"], "Organization_PIDs": [{"RORid": "https://ror.org/03bea9k73", "Confidence": 1}]}, {"Name": {"Full": "Anja Lena Thiebes", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biohybrid and Medical Textiles (BioTex), AME-Helmholtz Institute for Biomedical Engineering, ITA-Institut f\u00fcr Textiltechnik, RWTH Aachen University and at AMIBM Maastricht University, Maastricht, The Netherlands, Aachen, Germany"], "Organization_PIDs": [{"RORid": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"RORid": "https://ror.org/04xfq0f34", "Confidence": 0.87}]}, {"Name": {"Full": "Christian G. Cornelissen", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biohybrid and Medical Textiles (BioTex), AME-Helmholtz Institute for Biomedical Engineering, ITA-Institut f\u00fcr Textiltechnik, RWTH Aachen University and at AMIBM Maastricht University, Maastricht, The Netherlands, Aachen, Germany"], "Organization_PIDs": [{"RORid": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"RORid": "https://ror.org/04xfq0f34", "Confidence": 0.87}]}, {"Name": {"Full": "Barry O\u2019Brien", "First": null, "Last": null}, "Raw_affiliations": ["Department for Internal Medicine \u2013 Section for Pneumology, Medical Faculty, RWTH Aachen University, Aachen, Germany"], "Organization_PIDs": [{"RORid": "https://ror.org/04xfq0f34", "Confidence": 1}]}, {"Name": {"Full": "Stefan Jockenhoevel", "First": null, "Last": null}, "Raw_affiliations": ["Biomechanics Research Centre (BMEC), Biomedical Engineering, College of Engineering and Informatics, NUI Galway, Galway, Ireland"], "Organization_PIDs": [{"RORid": "https://ror.org/03bea9k73", "Confidence": 1}]}, {"Name": {"Full": "Mark Bruzzi", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biohybrid and Medical Textiles (BioTex), AME-Helmholtz Institute for Biomedical Engineering, ITA-Institut f\u00fcr Textiltechnik, RWTH Aachen University and at AMIBM Maastricht University, Maastricht, The Netherlands, Aachen, Germany"], "Organization_PIDs": [{"RORid": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"RORid": "https://ror.org/04xfq0f34", "Confidence": 0.87}]}, {"Name": {"Full": "Peter E. McHugh", "First": null, "Last": null}, "Raw_affiliations": ["Biomechanics Research Centre (BMEC), Biomedical Engineering, College of Engineering and Informatics, NUI Galway, Galway, Ireland"], "Organization_PIDs": [{"RORid": "https://ror.org/03bea9k73", "Confidence": 1}]}], "Organizations": [{"RORid": "https://ror.org/03bea9k73", "Confidence": 1}, {"RORid": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"RORid": "https://ror.org/04xfq0f34", "Confidence": 0.87}, {"RORid": "https://ror.org/04xfq0f34", "Confidence": 1}]} +{"DOI": "10.1007/BF03168973", "Authors": [{"Name": {"Full": "Sheehan, G.", "First": null, "Last": null}, "Raw_affiliations": ["Dept of Infectious Diseases, Mater Misercordiae Hospital, Dublin 7"], "Organization_PIDs": []}, {"Name": {"Full": "Chew, N.", "First": null, "Last": null}, "Raw_affiliations": ["Dept of Infectious Diseases, Mater Misercordiae Hospital, Dublin 7"], "Organization_PIDs": []}], "Organizations": []} +{"DOI": "10.1007/s00338-009-0480-1", "Authors": [{"Name": {"Full": "Gleason, D. F.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biology, Georgia Southern University, Statesboro, USA"], "Organization_PIDs": [{"RORid": "https://ror.org/04agmb972", "Confidence": 1}]}, {"Name": {"Full": "Danilowicz, B. S.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biology, Georgia Southern University, Statesboro, USA"], "Organization_PIDs": [{"RORid": "https://ror.org/04agmb972", "Confidence": 1}]}, {"Name": {"Full": "Nolan, C. J.", "First": null, "Last": null}, "Raw_affiliations": ["School of Biology and Environmental Science, University College Dublin, Dublin 4, Ireland"], "Organization_PIDs": [{"RORid": "https://ror.org/05m7pjf47", "Confidence": 1}]}], "Organizations": [{"RORid": "https://ror.org/04agmb972", "Confidence": 1}, {"RORid": "https://ror.org/05m7pjf47", "Confidence": 1}]} +{"DOI": "10.1007/s10993-010-9187-y", "Authors": [{"Name": {"Full": "Martin Howard", "First": null, "Last": null}, "Raw_affiliations": ["University College Cork"], "Organization_PIDs": [{"RORid": "https://ror.org/03265fv13", "Confidence": 1}]}], "Organizations": [{"RORid": "https://ror.org/03265fv13", "Confidence": 1}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/publishers/publisher b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/publishers/publisher new file mode 100644 index 000000000..426500e73 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/publishers/publisher @@ -0,0 +1,6 @@ +{"DOI": "10.1007/s00217-010-1268-9", "Authors": [{"Name": {"Full": "Martin Zarnkow", "First": null, "Last": null}, "Raw_affiliations": ["TU M\u00fcnchen, Lehrstuhl f\u00fcr Brau- und Getr\u00e4nketechnologie"], "Organization_PIDs": []}, {"Name": {"Full": "Andrea Faltermaier", "First": null, "Last": null}, "Raw_affiliations": ["Lehrstuhl f\u00fcr Brau- und Getr\u00e4nketechnologie"], "Organization_PIDs": []}, {"Name": {"Full": "Werner Back", "First": null, "Last": null}, "Raw_affiliations": ["Lehrstuhl f\u00fcr Technologie der Brauerei I"], "Organization_PIDs": []}, {"Name": {"Full": "Martina Gastl", "First": null, "Last": null}, "Raw_affiliations": ["Lehrstuhl f\u00fcr Brau- und Getr\u00e4nketechnologie"], "Organization_PIDs": []}, {"Name": {"Full": "Elkek K. Arendt", "First": null, "Last": null}, "Raw_affiliations": ["University College Cork"], "Organization_PIDs": [{"Value": "https://ror.org/03265fv13", "Confidence": 1}]}], "Organizations": [{"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/03265fv13", "Confidence": 1}]} +{"DOI": "10.1007/BF01154707", "Authors": [{"Name": {"Full": "Buggy, M.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Materials Science and Technology, University of Limerick, Limerick, Ireland"], "Organization_PIDs": [{"Value": "https://ror.org/00a0n9e72", "Confidence": 1}]}, {"Name": {"Full": "Carew, A.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Materials Science and Technology, University of Limerick, Limerick, Ireland"], "Organization_PIDs": [{"Value": "https://ror.org/00a0n9e72", "Confidence": 1}]}], "Organizations": [{"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/00a0n9e72", "Confidence": 1}]} +{"DOI": "10.1007/s10237-017-0974-7", "Authors": [{"Name": {"Full": "Donnacha J. McGrath", "First": null, "Last": null}, "Raw_affiliations": ["Biomechanics Research Centre (BMEC), Biomedical Engineering, College of Engineering and Informatics, NUI Galway, Galway, Ireland"], "Organization_PIDs": [{"Value": "https://ror.org/03bea9k73", "Confidence": 1}]}, {"Name": {"Full": "Anja Lena Thiebes", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biohybrid and Medical Textiles (BioTex), AME-Helmholtz Institute for Biomedical Engineering, ITA-Institut f\u00fcr Textiltechnik, RWTH Aachen University and at AMIBM Maastricht University, Maastricht, The Netherlands, Aachen, Germany"], "Organization_PIDs": [{"Value": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"Value": "https://ror.org/04xfq0f34", "Confidence": 0.87}]}, {"Name": {"Full": "Christian G. Cornelissen", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biohybrid and Medical Textiles (BioTex), AME-Helmholtz Institute for Biomedical Engineering, ITA-Institut f\u00fcr Textiltechnik, RWTH Aachen University and at AMIBM Maastricht University, Maastricht, The Netherlands, Aachen, Germany"], "Organization_PIDs": [{"Value": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"Value": "https://ror.org/04xfq0f34", "Confidence": 0.87}]}, {"Name": {"Full": "Barry O\u2019Brien", "First": null, "Last": null}, "Raw_affiliations": ["Department for Internal Medicine \u2013 Section for Pneumology, Medical Faculty, RWTH Aachen University, Aachen, Germany"], "Organization_PIDs": [{"Value": "https://ror.org/04xfq0f34", "Confidence": 1}]}, {"Name": {"Full": "Stefan Jockenhoevel", "First": null, "Last": null}, "Raw_affiliations": ["Biomechanics Research Centre (BMEC), Biomedical Engineering, College of Engineering and Informatics, NUI Galway, Galway, Ireland"], "Organization_PIDs": [{"Value": "https://ror.org/03bea9k73", "Confidence": 1}]}, {"Name": {"Full": "Mark Bruzzi", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biohybrid and Medical Textiles (BioTex), AME-Helmholtz Institute for Biomedical Engineering, ITA-Institut f\u00fcr Textiltechnik, RWTH Aachen University and at AMIBM Maastricht University, Maastricht, The Netherlands, Aachen, Germany"], "Organization_PIDs": [{"Value": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"Value": "https://ror.org/04xfq0f34", "Confidence": 0.87}]}, {"Name": {"Full": "Peter E. McHugh", "First": null, "Last": null}, "Raw_affiliations": ["Biomechanics Research Centre (BMEC), Biomedical Engineering, College of Engineering and Informatics, NUI Galway, Galway, Ireland"], "Organization_PIDs": [{"Value": "https://ror.org/03bea9k73", "Confidence": 1}]}], "Organizations": [{"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/03bea9k73", "Confidence": 1}, {"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/02jz4aj89", "Confidence": 0.82}, {"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/04xfq0f34", "Confidence": 0.87}, {"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/04xfq0f34", "Confidence": 1}]} +{"DOI": "10.1007/BF03168973", "Authors": [{"Name": {"Full": "Sheehan, G.", "First": null, "Last": null}, "Raw_affiliations": ["Dept of Infectious Diseases, Mater Misercordiae Hospital, Dublin 7"], "Organization_PIDs": []}, {"Name": {"Full": "Chew, N.", "First": null, "Last": null}, "Raw_affiliations": ["Dept of Infectious Diseases, Mater Misercordiae Hospital, Dublin 7"], "Organization_PIDs": []}], "Organizations": []} +{"DOI": "10.1007/s00338-009-0480-1", "Authors": [{"Name": {"Full": "Gleason, D. F.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biology, Georgia Southern University, Statesboro, USA"], "Organization_PIDs": [{"Value": "https://ror.org/04agmb972", "Confidence": 1}]}, {"Name": {"Full": "Danilowicz, B. S.", "First": null, "Last": null}, "Raw_affiliations": ["Department of Biology, Georgia Southern University, Statesboro, USA"], "Organization_PIDs": [{"Value": "https://ror.org/04agmb972", "Confidence": 1}]}, {"Name": {"Full": "Nolan, C. J.", "First": null, "Last": null}, "Raw_affiliations": ["School of Biology and Environmental Science, University College Dublin, Dublin 4, Ireland"], "Organization_PIDs": [{"Value": "https://ror.org/05m7pjf47", "Confidence": 1}]}], "Organizations": [{"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/04agmb972", "Confidence": 1}, {"Provenance":"AffRo","PID":"ROR","Status":"active","Value": "https://ror.org/05m7pjf47", "Confidence": 1}]} +{"DOI": "10.1007/s10993-010-9187-y", "Authors": [{"Name": {"Full": "Martin Howard", "First": null, "Last": null}, "Raw_affiliations": ["University College Cork"], "Organization_PIDs": [{"Value": "https://ror.org/03265fv13", "Confidence": 1}]}], "Organizations": [{"PID":"ROR","Status":"active","Value": "https://ror.org/03265fv13", "Confidence": 1}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala index 185381f8f..ba3c92794 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -313,7 +313,7 @@ case object ConversionUtil { if (f.author.DisplayName.isDefined) a.setFullname(f.author.DisplayName.get) if (f.affiliation != null) - a.setAffiliation(List(asField(f.affiliation)).asJava) + a.setRawAffiliationString(List(f.affiliation).asJava) a.setPid( List( createSP( @@ -386,7 +386,7 @@ case object ConversionUtil { a.setFullname(f.author.DisplayName.get) if (f.affiliation != null) - a.setAffiliation(List(asField(f.affiliation)).asJava) + a.setRawAffiliationString(List(f.affiliation).asJava) a.setPid( List( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java index 76e1d57a1..73243dbc5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java @@ -9,10 +9,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,8 +22,6 @@ public class GraphHiveTableImporterJob { private static final Logger log = LoggerFactory.getLogger(GraphHiveTableImporterJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -74,7 +69,12 @@ public class GraphHiveTableImporterJob { private static void loadGraphTable(SparkSession spark, String inputPath, String hiveDbName, Class clazz, int numPartitions) { - Dataset dataset = spark.read().textFile(inputPath); + final Encoder clazzEncoder = Encoders.bean(clazz); + + Dataset dataset = spark + .read() + .schema(clazzEncoder.schema()) + .json(inputPath); if (numPartitions > 0) { log.info("repartitioning {} to {} partitions", clazz.getSimpleName(), numPartitions); @@ -82,7 +82,6 @@ public class GraphHiveTableImporterJob { } dataset - .map((MapFunction) s -> OBJECT_MAPPER.readValue(s, clazz), Encoders.bean(clazz)) .write() .mode(SaveMode.Overwrite) .saveAsTable(tableIdentifier(hiveDbName, clazz)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index c95d5442a..a85f47d99 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -55,29 +55,7 @@ import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.AccessRight; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Context; -import eu.dnetlib.dhp.schema.oaf.Country; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.EoscIfGuidelines; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.GeoLocation; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.InstanceTypeMapping; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OAIProvenance; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.Subject; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @@ -667,22 +645,25 @@ public abstract class AbstractMdRecordToOafMapper { return this.vocs.getTermAsQualifier(schemeId, classId); } - protected List prepareListStructPropsWithValidQualifier( + protected List prepareListStructPropsWithValidQualifier( final Node node, final String xpath, final String xpathClassId, final String schemeId, final DataInfo info) { - final List res = new ArrayList<>(); + final Set res = new HashSet<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; final String classId = n.valueOf(xpathClassId).trim(); if (this.vocs.termExists(schemeId, classId)) { - res.add(structuredProperty(n.getText(), this.vocs.getTermAsQualifier(schemeId, classId), info)); + res + .add( + HashableStructuredProperty + .newInstance(n.getText(), this.vocs.getTermAsQualifier(schemeId, classId), info)); } } - return res; + return Lists.newArrayList(res); } protected List prepareListStructProps( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index eee518353..98da48f9e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -25,6 +25,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; public class OafToOafMapper extends AbstractMdRecordToOafMapper { @@ -380,7 +381,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { return prepareListStructPropsWithValidQualifier( doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info) .stream() - .map(CleaningFunctions::normalizePidValue) + .map(PidCleaner::normalizePidValue) .collect(Collectors.toList()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 57e0d2955..ad61304a0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -24,6 +24,7 @@ import eu.dnetlib.dhp.schema.common.RelationInverse; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @@ -93,7 +94,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); } - author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info)); + author.setRawAffiliationString(prepareListString(n, "./*[local-name()='affiliation']")); author.setPid(preparePids(n, info)); author.setRank(pos++); res.add(author); @@ -504,7 +505,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final Set res = new HashSet<>(); + final Set res = new HashSet<>(); res .addAll( prepareListStructPropsWithValidQualifier( @@ -524,7 +525,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { return res .stream() - .map(CleaningFunctions::normalizePidValue) + .map(PidCleaner::normalizePidValue) + .filter(CleaningFunctions::pidFilter) .collect(Collectors.toList()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java index afaac04ea..e059cbc86 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java @@ -73,14 +73,10 @@ public class GraphHiveImporterJobTest { GraphHiveImporterJob .main( new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - getClass().getResource("/eu/dnetlib/dhp/oa/graph/sample").getPath(), - "-hiveMetastoreUris", - "", - "-hiveDbName", - dbName + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", getClass().getResource("/eu/dnetlib/dhp/oa/graph/sample").getPath(), + "--hiveMetastoreUris", "", + "--hiveDbName", dbName }); ModelSupport.oafTypes diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java index 4ae3f82c2..23de57204 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java @@ -388,7 +388,7 @@ public class CleanGraphSparkJobTest { .collect(Collectors.toList()); assertNotNull(fos_subjects); - assertEquals(2, fos_subjects.size()); + assertEquals(3, fos_subjects.size()); assertTrue( fos_subjects @@ -396,18 +396,10 @@ public class CleanGraphSparkJobTest { .anyMatch( s -> "0101 mathematics".equals(s.getValue()) & ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()) & - "sysimport:crosswalk:datasetarchive" - .equals(s.getDataInfo().getProvenanceaction().getClassid()))); + "subject:fos".equals(s.getDataInfo().getProvenanceaction().getClassid()))); - assertTrue( - fos_subjects - .stream() - .anyMatch( - s -> "0102 computer and information sciences".equals(s.getValue()) & - ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))); - - verify_keyword(p, "In Situ Hybridization"); - verify_keyword(p, "Avicennia"); + verify_keyword(p, "FOS: Mathematics"); + verify_keyword(p, "FOS: Computer and information sciences"); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 8d10508a9..cdc052dfa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -266,7 +266,7 @@ public class GraphCleaningFunctionsTest { .collect(Collectors.toList()); assertNotNull(fos_subjects); - assertEquals(2, fos_subjects.size()); + assertEquals(3, fos_subjects.size()); assertTrue( fos_subjects @@ -274,18 +274,18 @@ public class GraphCleaningFunctionsTest { .anyMatch( s -> "0101 mathematics".equals(s.getValue()) & ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()) & - "sysimport:crosswalk:datasetarchive" - .equals(s.getDataInfo().getProvenanceaction().getClassid()))); + "subject:fos".equals(s.getDataInfo().getProvenanceaction().getClassid()))); assertTrue( fos_subjects .stream() .anyMatch( s -> "0102 computer and information sciences".equals(s.getValue()) & - ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))); + ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()) & + "subject:fos".equals(s.getDataInfo().getProvenanceaction().getClassid()))); - verify_keyword(p_cleaned, "In Situ Hybridization"); - verify_keyword(p_cleaned, "Avicennia"); + verify_keyword(p_cleaned, "FOS: Computer and information sciences"); + verify_keyword(p_cleaned, "FOS: Mathematics"); // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 6ec2f1d51..612e1d0b6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -44,7 +44,7 @@ class GenerateEntitiesApplicationTest { } @Test - void testMergeResult() throws IOException, DocumentException { + void testMergeResult() throws IOException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); @@ -69,15 +69,15 @@ class GenerateEntitiesApplicationTest { verifyMerge(orp, software, Software.class, ModelConstants.SOFTWARE_RESULTTYPE_CLASSID); } - protected void verifyMerge(Result publication, Result dataset, Class clazz, + protected void verifyMerge(Result r1, Result r2, Class clazz, String resultType) { - final Result merge = (Result) MergeUtils.merge(publication, dataset); + final Result merge = MergeUtils.checkedMerge(r1, r2, true); assertTrue(clazz.isAssignableFrom(merge.getClass())); assertEquals(resultType, merge.getResulttype().getClassid()); } protected Result getResult(String xmlFileName, Class clazz) - throws IOException, DocumentException { + throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); return new OdfToOafMapper(vocs, false, true) .processMdRecord(xml) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 4fb8f86f7..2cf3ea0c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -216,7 +216,7 @@ class MappersTest { } @Test - void testPublication_PubMed() throws IOException, DocumentException { + void testPublication_PubMed() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); @@ -264,8 +264,17 @@ class MappersTest { assertFalse(p.getSubject().isEmpty()); assertFalse(p.getPid().isEmpty()); - assertEquals("PMC1517292", p.getPid().get(0).getValue()); - assertEquals("pmc", p.getPid().get(0).getQualifier().getClassid()); + + assertTrue(p.getPid().stream().anyMatch(pi -> "pmc".equals(pi.getQualifier().getClassid()))); + assertEquals( + "PMC1517292", + p + .getPid() + .stream() + .filter(pi -> "pmc".equals(pi.getQualifier().getClassid())) + .findFirst() + .get() + .getValue()); assertNotNull(p.getInstance()); assertFalse(p.getInstance().isEmpty()); @@ -292,7 +301,7 @@ class MappersTest { } @Test - void testPublicationInvisible() throws IOException, DocumentException { + void testPublicationInvisible() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -307,6 +316,25 @@ class MappersTest { } + @Test + void testPublicationInvisible_BASE() throws IOException { + + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record_base.xml"))); + + final List list = new OdfToOafMapper(vocs, true, true).processMdRecord(xml); + + assertFalse(list.isEmpty()); + assertTrue(list.get(0) instanceof Publication); + + final Publication p = (Publication) list.get(0); + + assertTrue(p.getDataInfo().getInvisible()); + + System.out.println(new ObjectMapper().writeValueAsString(p)); + + } + @Test void testOdfFwfEBookLibrary() throws IOException { final String xml = IOUtils @@ -318,7 +346,7 @@ class MappersTest { } @Test - void testDataset() throws IOException, DocumentException { + void testDataset() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -332,19 +360,19 @@ class MappersTest { final Relation r1 = (Relation) list.get(1); final Relation r2 = (Relation) list.get(2); - assertEquals(d.getId(), r1.getSource()); - assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget()); + assertEquals(d.getId(), r1.getTarget()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getSource()); assertEquals(ModelConstants.RESULT_PROJECT, r1.getRelType()); assertEquals(ModelConstants.OUTCOME, r1.getSubRelType()); - assertEquals(ModelConstants.IS_PRODUCED_BY, r1.getRelClass()); + assertEquals(ModelConstants.PRODUCES, r1.getRelClass()); assertTrue(r1.getValidated()); assertEquals("2020-01-01", r1.getValidationDate()); - assertEquals(d.getId(), r2.getTarget()); - assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource()); + assertEquals(d.getId(), r2.getSource()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getTarget()); assertEquals(ModelConstants.RESULT_PROJECT, r2.getRelType()); assertEquals(ModelConstants.OUTCOME, r2.getSubRelType()); - assertEquals(ModelConstants.PRODUCES, r2.getRelClass()); + assertEquals(ModelConstants.IS_PRODUCED_BY, r2.getRelClass()); assertTrue(r2.getValidated()); assertEquals("2020-01-01", r2.getValidationDate()); @@ -378,15 +406,15 @@ class MappersTest { assertEquals("Baracchini", author.get().getSurname()); assertEquals("Theo", author.get().getName()); - assertEquals(1, author.get().getAffiliation().size()); - final Optional> opAff = author + assertEquals(1, author.get().getRawAffiliationString().size()); + final Optional opAff = author .get() - .getAffiliation() + .getRawAffiliationString() .stream() .findFirst(); assertTrue(opAff.isPresent()); - final Field affiliation = opAff.get(); - assertEquals("ISTI-CNR", affiliation.getValue()); + final String affiliation = opAff.get(); + assertEquals("ISTI-CNR", affiliation); assertFalse(d.getSubject().isEmpty()); assertFalse(d.getInstance().isEmpty()); @@ -450,7 +478,7 @@ class MappersTest { } @Test - void testOdfBielefeld() throws IOException, DocumentException { + void testOdfBielefeld() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); @@ -501,7 +529,7 @@ class MappersTest { } @Test - void testOpentrial() throws IOException, DocumentException { + void testOpentrial() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); @@ -741,7 +769,7 @@ class MappersTest { } @Test - void testSoftware() throws IOException, DocumentException { + void testSoftware() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -763,22 +791,21 @@ class MappersTest { final Relation r1 = (Relation) list.get(1); final Relation r2 = (Relation) list.get(2); - assertEquals(s.getId(), r1.getSource()); - assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r1.getTarget()); + assertEquals(s.getId(), r1.getTarget()); + assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r1.getSource()); assertEquals(ModelConstants.RESULT_RESULT, r1.getRelType()); assertEquals(ModelConstants.RELATIONSHIP, r1.getSubRelType()); - assertEquals(ModelConstants.IS_REFERENCED_BY, r1.getRelClass()); + assertEquals(ModelConstants.REFERENCES, r1.getRelClass()); - assertEquals(s.getId(), r2.getTarget()); - assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r2.getSource()); + assertEquals(s.getId(), r2.getSource()); + assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r2.getTarget()); assertEquals(ModelConstants.RESULT_RESULT, r2.getRelType()); assertEquals(ModelConstants.RELATIONSHIP, r2.getSubRelType()); - assertEquals(ModelConstants.REFERENCES, r2.getRelClass()); - + assertEquals(ModelConstants.IS_REFERENCED_BY, r2.getRelClass()); } @Test - void testClaimDedup() throws IOException, DocumentException { + void testClaimDedup() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -792,7 +819,7 @@ class MappersTest { } @Test - void testNakala() throws IOException, DocumentException { + void testNakala() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -820,7 +847,7 @@ class MappersTest { } @Test - void testEnermaps() throws IOException, DocumentException { + void testEnermaps() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -845,7 +872,7 @@ class MappersTest { } @Test - void testClaimFromCrossref() throws IOException, DocumentException { + void testClaimFromCrossref() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -862,7 +889,7 @@ class MappersTest { } @Test - void testODFRecord() throws IOException, DocumentException { + void testODFRecord() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -882,7 +909,7 @@ class MappersTest { } @Test - void testTextGrid() throws IOException, DocumentException { + void testTextGrid() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -916,7 +943,7 @@ class MappersTest { } @Test - void testBologna() throws IOException, DocumentException { + void testBologna() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -933,7 +960,7 @@ class MappersTest { } @Test - void testJairo() throws IOException, DocumentException { + void testJairo() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -971,7 +998,7 @@ class MappersTest { } @Test - void testZenodo() throws IOException, DocumentException { + void testZenodo() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -1016,7 +1043,7 @@ class MappersTest { } @Test - void testOdfFromHdfs() throws IOException, DocumentException { + void testOdfFromHdfs() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); @@ -1065,7 +1092,7 @@ class MappersTest { } @Test - void testXMLEncodedURL() throws IOException, DocumentException { + void testXMLEncodedURL() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -1081,7 +1108,7 @@ class MappersTest { } @Test - void testXMLEncodedURL_ODF() throws IOException, DocumentException { + void testXMLEncodedURL_ODF() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -1245,7 +1272,7 @@ class MappersTest { } @Test - void testRiunet() throws IOException, DocumentException { + void testRiunet() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("riunet.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -1291,7 +1318,7 @@ class MappersTest { } @Test - void testIRISPub() throws IOException, DocumentException { + void testIRISPub() throws IOException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("iris-odf.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/publication/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/publication/publication.json index e2a2b9449..2536934c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/publication/publication.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/publication/publication.json @@ -1,4 +1,4 @@ -{"id":"50|CSC_________::2250a70c903c6ac6e4c01438259e9375","author":[{"affiliation":[],"fullname":"Brien, Tom","name":"Tom","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID12","classname":"ORCID12","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-9613-6639"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID12","classname":"ORCID12","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"https://orcid.org/0000-0001-9613-6639"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"ORCID12","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-9613-6639"}],"rank":1,"surname":"Brien"},{"affiliation":[],"fullname":"Ade, Peter","name":"Peter","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"xyz","classname":"XYZ","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"qwerty"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"","schemename":""},"value":"asdasd"}],"rank":2,"surname":"Ade"},{"affiliation":[],"fullname":"Barry, Peter S.","name":"Peter S.","pid":null,"rank":3,"surname":"Barry"},{"affiliation":[],"fullname":"Dunscombe, Chris J.","name":"Chris J.","pid":[],"rank":4,"surname":"Dunscombe"},{"affiliation":[],"fullname":"Leadley, David R.","name":"David R.","pid":[],"rank":5,"surname":"Leadley"},{"affiliation":[],"fullname":"Morozov, Dmitry V.","name":"Dmitry V.","pid":[],"rank":6,"surname":"Morozov"},{"affiliation":[],"fullname":"Myronov, Maksym","name":"Maksym","pid":[],"rank":7,"surname":"Myronov"},{"affiliation":[],"fullname":"Parker, Evan","name":"Evan","pid":[],"rank":8,"surname":"Parker"},{"affiliation":[],"fullname":"Prest, Martin J.","name":"Martin J.","pid":[],"rank":9,"surname":"Prest"},{"affiliation":[],"fullname":"Prunnila, Mika","name":"Mika","pid":[],"rank":10,"surname":"Prunnila"},{"affiliation":[],"fullname":"Sudiwala, Rashmi V.","name":"Rashmi V.","pid":[],"rank":11,"surname":"Sudiwala"},{"affiliation":[],"fullname":"Whall, Terry E.","name":"Terry E.","pid":[],"rank":12,"surname":"Whall"},{"affiliation":[],"fullname":" - ","name":"","pid":[],"rank":13,"surname":""},{"affiliation":[],"fullname":" :none","name":"","pid":[],"rank":14,"surname":""}],"bestaccessright":null,"publisher":{"value":null},"collectedfrom":[{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"}],"context":[],"contributor":[],"country":[{"classid":"DE","classname":"DE","schemeid":"dnet:countries","schemename":"dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"7 oct 1970"},"dateofcollection":"","dateoftransformation":"2020-04-22T12:34:08.009Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"instance":[{"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1008/abcd"}],"alternateIdentifier":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1009/qwerty"}],"accessright":{"classid":"CLOSED","classname":"CLOSED","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"distributionlocation":"","hostedby":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"instancetype":{"classid":"Comment/debate","classname":"Comment/debate","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://juuli.fi/Record/0275158616","http://dx.doi.org/10.1007/s109090161569x","http://academia.edu/abcd","http://repo.scoap3.org/api","http://hdl.handle.net/"]},{"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1002/s21010127267xy"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1008/abcd"}],"alternateIdentifier":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1009/qwerty"}],"accessright":{"classid":"CLOSED","classname":"CLOSED","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"distributionlocation":"","hostedby":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"instancetype":{"classid":"Model","classname":"Model","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://dx.doi.org/10.1002/s21010127267xy"]},{"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1002/s21010127267xy"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1008/abcd"}],"alternateIdentifier":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1009/qwerty"}],"accessright":{"classid":"CLOSED","classname":"CLOSED","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"distributionlocation":"","hostedby":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"instancetype":{"classid":"xyz","classname":"xyz","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://dx.doi.org/10.1002/t32121238378t"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":" 7","iss":"9 March","issnLinking":"","issnOnline":"","issnPrinted":"0022-2291","name":"Journal of Low Temperature Physics - Early Acces","sp":"1 ","vol":""},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283286319,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif","datestamp":"2019-07-30","harvestDate":"2020-04-22T11:04:38.685Z","identifier":"oai:virta-jtp.csc.fi:Publications/0275158616","metadataNamespace":""}},"originalId":["CSC_________::2250a70c903c6ac6e4c01438259e9375"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"deletedbyinference":false,"inferred":false,"inferenceprovenance":"","invisible":false,"trust":"0.9"},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"In Situ Hybridization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"ta213"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"FOS: Mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"FOS: Computer and information sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"0101 mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"0101 mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"slot antennas"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"strained silicon"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"cold electron bolometers"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Avicennia"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"measure noise"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"noise equivalent power"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"optical characterisation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"optical response"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"photon noise"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"silicon absorbers"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Optical response of strained- and unstrained-silicon cold-electron bolometers test"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"test test 123 test"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"omic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"-"}]} +{"id":"50|CSC_________::2250a70c903c6ac6e4c01438259e9375","author":[{"affiliation":[],"fullname":"Brien, Tom","name":"Tom","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID12","classname":"ORCID12","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-9613-6639"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID12","classname":"ORCID12","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"https://orcid.org/0000-0001-9613-6639"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"ORCID12","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-9613-6639"}],"rank":1,"surname":"Brien"},{"affiliation":[],"fullname":"Ade, Peter","name":"Peter","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"xyz","classname":"XYZ","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"qwerty"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"","schemename":""},"value":"asdasd"}],"rank":2,"surname":"Ade"},{"affiliation":[],"fullname":"Barry, Peter S.","name":"Peter S.","pid":null,"rank":3,"surname":"Barry"},{"affiliation":[],"fullname":"Dunscombe, Chris J.","name":"Chris J.","pid":[],"rank":4,"surname":"Dunscombe"},{"affiliation":[],"fullname":"Leadley, David R.","name":"David R.","pid":[],"rank":5,"surname":"Leadley"},{"affiliation":[],"fullname":"Morozov, Dmitry V.","name":"Dmitry V.","pid":[],"rank":6,"surname":"Morozov"},{"affiliation":[],"fullname":"Myronov, Maksym","name":"Maksym","pid":[],"rank":7,"surname":"Myronov"},{"affiliation":[],"fullname":"Parker, Evan","name":"Evan","pid":[],"rank":8,"surname":"Parker"},{"affiliation":[],"fullname":"Prest, Martin J.","name":"Martin J.","pid":[],"rank":9,"surname":"Prest"},{"affiliation":[],"fullname":"Prunnila, Mika","name":"Mika","pid":[],"rank":10,"surname":"Prunnila"},{"affiliation":[],"fullname":"Sudiwala, Rashmi V.","name":"Rashmi V.","pid":[],"rank":11,"surname":"Sudiwala"},{"affiliation":[],"fullname":"Whall, Terry E.","name":"Terry E.","pid":[],"rank":12,"surname":"Whall"},{"affiliation":[],"fullname":" - ","name":"","pid":[],"rank":13,"surname":""},{"affiliation":[],"fullname":" :none","name":"","pid":[],"rank":14,"surname":""}],"bestaccessright":null,"publisher":{"value":null},"collectedfrom":[{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"}],"context":[],"contributor":[],"country":[{"classid":"DE","classname":"DE","schemeid":"dnet:countries","schemename":"dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"7 oct 1970"},"dateofcollection":"","dateoftransformation":"2020-04-22T12:34:08.009Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"instance":[{"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1008/abcd"}],"alternateIdentifier":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1009/qwerty"}],"accessright":{"classid":"CLOSED","classname":"CLOSED","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"distributionlocation":"","hostedby":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"instancetype":{"classid":"Comment/debate","classname":"Comment/debate","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://juuli.fi/Record/0275158616","http://dx.doi.org/10.1007/s109090161569x","http://academia.edu/abcd","http://repo.scoap3.org/api","http://hdl.handle.net/"]},{"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1002/s21010127267xy"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1008/abcd"}],"alternateIdentifier":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1009/qwerty"}],"accessright":{"classid":"CLOSED","classname":"CLOSED","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"distributionlocation":"","hostedby":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"instancetype":{"classid":"Model","classname":"Model","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://dx.doi.org/10.1002/s21010127267xy"]},{"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1002/s21010127267xy"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1008/abcd"}],"alternateIdentifier":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1009/qwerty"}],"accessright":{"classid":"CLOSED","classname":"CLOSED","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"distributionlocation":"","hostedby":{"key":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","value":"VIRTA"},"instancetype":{"classid":"xyz","classname":"xyz","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://dx.doi.org/10.1002/t32121238378t"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":" 7","iss":"9 March","issnLinking":"","issnOnline":"","issnPrinted":"0022-2291","name":"Journal of Low Temperature Physics - Early Acces","sp":"1 ","vol":""},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283286319,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif","datestamp":"2019-07-30","harvestDate":"2020-04-22T11:04:38.685Z","identifier":"oai:virta-jtp.csc.fi:Publications/0275158616","metadataNamespace":""}},"originalId":["CSC_________::2250a70c903c6ac6e4c01438259e9375"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s109090161569x"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"deletedbyinference":false,"inferred":false,"inferenceprovenance":"","invisible":false,"trust":"0.9"},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"In Situ Hybridization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"ta213"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"FOS: Mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"FOS: Computer and information sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"0101 mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"subject:fos","classname":"subject:fos","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"0101 mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"slot antennas"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"strained silicon"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"cold electron bolometers"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"FOS","classname":"Fields of Science and Technology classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Avicennia"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"measure noise"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"noise equivalent power"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"optical characterisation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"optical response"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"photon noise"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"silicon absorbers"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Optical response of strained- and unstrained-silicon cold-electron bolometers test"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"test test 123 test"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"omic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"-"}]} {"id":"50|doi_________::b0baa0eb88a5788f0b8815560d2a32f2","context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-11-06T11:36:37Z"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "subject": [], "lastupdatetimestamp": 1620353302565, "author": [{"fullname": "N. S. AGRUSS", "surname": "AGRUSS", "name": "N. S.", "rank": 1}, {"fullname": "E. Y. ROSIN", "surname": "ROSIN", "name": "E. Y.", "rank": 2}, {"fullname": "R. J. ADOLPH", "surname": "ADOLPH", "name": "R. J.", "rank": 3}, {"fullname": "N. O. FOWLER", "surname": "FOWLER", "name": "N. O.", "rank": 4}], "instance": [{"hostedby": {"key": "10|issn___print::b8cee613d4f898f8c03956d57ea69be2", "value": "Survey of Anesthesiology"}, "url": ["https://doi.org/10.1097/00132586-197308000-00003"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.1097/00132586-197308000-00003"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T02:08:22Z", "fulltext": [], "description": [], "format": [], "journal": {"issnPrinted": "0039-6206", "vol": "17", "sp": "304", "name": "Survey of Anesthesiology"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Ovid Technologies (Wolters Kluwer Health)"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.1097/00132586-197308000-00003", "50|doiboost____::b0baa0eb88a5788f0b8815560d2a32f2"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-11-06T11:36:37Z"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "SIGNIFICANCE OF CHRONIC SINUS BRADYCARDIA IN ELDERLY PEOPLE"}]} {"id":"50|doi_________::4972b0ca81b96b225aed8038bb965656","context": [], "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "sysimport:actionset", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2007-08-20T08:35:04Z"}, {"qualifier": {"classid": "published-online", "classname": "published-online", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2006-01-01"}], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}], "subject": [{"qualifier": {"classid": "keywords", "classname": "keywords", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "General Medicine"}], "lastupdatetimestamp": 1620381522840, "author": [{"fullname": "null VERHAMME P", "surname": "VERHAMME P", "rank": 1}], "instance": [{"hostedby": {"key": "10|issn__online::7ec728ad1ac65c60cd563a5137111125", "value": "Tijdschrift voor Geneeskunde"}, "url": ["https://doi.org/10.2143/tvg.62.1.5002364"], "pid": [{"qualifier": {"classid": "doi", "classname": "doi", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.2143/tvg.62.1.5002364"}], "dateofacceptance": {"value": "2006-01-01"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2021-05-07T09:58:42Z", "fulltext": [], "description": [], "format": [], "journal": {"vol": "62", "sp": "55", "issnOnline": "0371-683X", "ep": "61", "name": "Tijdschrift voor Geneeskunde"}, "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "Peeters Publishers"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.2143/tvg.62.1.5002364", "50|doiboost____::4972b0ca81b96b225aed8038bb965656"], "source": [{"value": "Crossref"}], "dateofacceptance": {"value": "2006-01-01"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Antitrombotica: nieuwe moleculen"}]} {"id":"50|DansKnawCris::0224aae28af558f21768dbc6439a_ctx","author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"8250"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Gcube veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 8ef642dd3..bf43016ba 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -794,28 +794,6 @@ }, "value": "FOS: Computer and information sciences" }, - { - "dataInfo": { - "deletedbyinference": false, - "inferenceprovenance": "", - "inferred": false, - "invisible": false, - "provenanceaction": { - "classid": "sysimport:crosswalk:datasetarchive", - "classname": "sysimport:crosswalk:datasetarchive", - "schemeid": "dnet:provenanceActions", - "schemename": "dnet:provenanceActions" - }, - "trust": "0.9" - }, - "qualifier": { - "classid": "keyword", - "classname": "keyword", - "schemeid": "dnet:subject_classification_typologies", - "schemename": "dnet:subject_classification_typologies" - }, - "value": "0101 mathematics" - }, { "dataInfo": { "deletedbyinference": false, @@ -831,8 +809,8 @@ "trust": "0.9" }, "qualifier": { - "classid": "keyword", - "classname": "keyword", + "classid": "FOS", + "classname": "Fields of Science and Technology classification", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies" }, @@ -910,8 +888,8 @@ "inferred": false, "invisible": false, "provenanceaction": { - "classid": "sysimport:actionset", - "classname": "Harvested", + "classid": "subject:fos", + "classname": "subject:fos", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions" }, @@ -923,7 +901,7 @@ "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies" }, - "value": "Avicennia" + "value": "0102 computer and information sciences" }, { "dataInfo": { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_record_base.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_record_base.xml new file mode 100644 index 000000000..6ae4a7f80 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_record_base.xml @@ -0,0 +1,129 @@ + + +
+ base_oa_____::7ecf1ef502253efffe203ca9a22bb9f1 + ftunivqespace:oai:espace.library.uq.edu.au:UQ:336902 + 2020-12-22T10:30:27Z + 2024-09-10T17:21:36.972Z +
+ + + https://doi.org/10.1016/j.envint.2014.07.004 + + https://espace.library.uq.edu.au/view/UQ:336902 + ftunivqespace:oai:espace.library.uq.edu.au:UQ:336902 + + + Article contribution + + The role of environmental factors in the spatial distribution of Japanese encephalitis in mainland China + + + + Wang, Liya + + + Hu, Wenbiao + + + Soares Magalhaes, Ricardo J. + + + Bi, Peng + + + Ding, Fan + + + Sun, Hailong + + + Li, Shenlong + + + Yin, Wenwu + + + Wei, Lan + + + Liu, Qiyong + + + Haque, Ubydul + + + Sun, Yansong + + + Huang, Liuyu + + + Tong, Shilu + + + Clements, Archie C.A. + + + Zhang, Wenyi + + + Li, Chengyi + + + + + Japanese encephalitis (JE) is the most common cause of viral encephalitis and an important public health concern in the Asia-Pacific region, particularly in China where 50% of global cases are notified. To explore the association between environmental factors and human JE cases and identify the high risk areas for JE transmission in China, we used annual notified data on JE cases at the center of administrative township and environmental variables with a pixel resolution of 1. km. ×. 1. km from 2005 to 2011 to construct models using ecological niche modeling (ENM) approaches based on maximum entropy. These models were then validated by overlaying reported human JE case localities from 2006 to 2012 onto each prediction map. ENMs had good discriminatory ability with the area under the curve (AUC) of the receiver operating curve (ROC) of 0.82-0.91, and low extrinsic omission rate of 5.44-7.42%. Resulting maps showed JE being presented extensively throughout southwestern and central China, with local spatial variations in probability influenced by minimum temperatures, human population density, mean temperatures, and elevation, with contribution of 17.94%-38.37%, 15.47%-21.82%, 3.86%-21.22%, and 12.05%-16.02%, respectively. Approximately 60% of JE cases occurred in predicted high risk areas, which covered less than 6% of areas in mainland China. Our findings will help inform optimal geographical allocation of the limited resources available for JE prevention and control in China, find hidden high-risk areas, and increase the effectiveness of public health interventions against JE transmission. + + + Japanese encephalitis + Ecological niche model + MaxEnt + China + 2300 Environmental Science + 950 + + Pergamon Press + 2014 + + eng + + + 0001 + UNKNOWN + 10.1163/qwerty + 0.1163/18763308-90001038 + https://doi.org/10.1016/j.envint.2014.07.004 + https://doi.org/10.1080/09672567.2013.792375 + http://doi.org/10.1080/08673487.2012.812376 + http://dx.doi.org/10.1090/08673487.2012.812376 + https://espace.library.uq.edu.au/view/UQ:336902 + ftunivqespace:oai:espace.library.uq.edu.au:UQ:336902 + + + 2014-12-01 + ror_________::https://ror.org/00rqy9422 + + false + false + 0.89 + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json index cac6e5a32..204ba6aeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json @@ -130,5 +130,10 @@ "value": [ "Pippo", "Foo" ] + }, + { + "field": "typology", + "type": "string", + "value": "Government" } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz index 0da3c4071..e112002d6 100644 Binary files a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz and b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/dataset/dataset_10.json.gz differ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz index 20b6a4dba..b32268d6a 100644 Binary files a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz and b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/otherresearchproduct/otherresearchproduct_10.json.gz differ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/person/person_10.json.gz b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/person/person_10.json.gz new file mode 100644 index 000000000..eec652430 Binary files /dev/null and b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/person/person_10.json.gz differ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz index 257e0db3a..09389a719 100644 Binary files a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz and b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/publication/publication_10.json.gz differ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz index a5b8c8774..e59ae6402 100644 Binary files a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz and b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/sample/software/software_10.json.gz differ diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala index 204fe9794..f01c117cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala @@ -4,12 +4,13 @@ import eu.dnetlib.dhp.schema.sx.scholix.ScholixResource import eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SparkSession} -import org.junit.jupiter.api.Test +import org.junit.jupiter.api.{Disabled, Test} import org.objenesis.strategy.StdInstantiatorStrategy class ScholixGenerationTest { @Test + @Disabled def generateScholix(): Unit = { val spark: SparkSession = SparkSession.builder().master("local[*]").getOrCreate() diff --git a/pom.xml b/pom.xml index 666ba2350..e1d99f25b 100644 --- a/pom.xml +++ b/pom.xml @@ -937,7 +937,7 @@ 1.1.3 1.7 1.0.7 - [7.0.1] + [8.0.1] cdh5.9.2 3.5 11.0.2