From 1ba582de3c9625417032294cd9212400d4e12663 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 23 Nov 2023 16:27:19 +0100 Subject: [PATCH 01/19] [graph cleaning] added cleaning for result.publisher and result.instance.license --- .../oaf/utils/GraphCleaningFunctions.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 7a8acbd36c..d397985066 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -14,6 +14,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; +import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm; import org.apache.commons.lang3.StringUtils; import com.github.sisyphsu.dateparser.DateParserUtils; @@ -28,6 +29,10 @@ import me.xuender.unidecode.Unidecode; public class GraphCleaningFunctions extends CleaningFunctions { + public static final String DNET_PUBLISHERS = "dnet:publishers"; + + public static final String DNET_LICENSES = "dnet:licenses"; + public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})"; public static final int ORCID_LEN = 19; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; @@ -407,6 +412,13 @@ public class GraphCleaningFunctions extends CleaningFunctions { .getPublisher() .getValue() .replaceAll(NAME_CLEANING_REGEX, " ")); + + if (vocs.vocabularyExists(DNET_PUBLISHERS)) { + vocs.find(DNET_PUBLISHERS) + .map(voc -> voc.getTermBySynonym(r.getPublisher().getValue())) + .map(VocabularyTerm::getName) + .ifPresent(publisher -> r.getPublisher().setValue(publisher)); + } } } if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) { @@ -567,6 +579,13 @@ public class GraphCleaningFunctions extends CleaningFunctions { i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); } + if (Objects.nonNull(i.getLicense()) && Objects.nonNull(i.getLicense().getValue())) { + vocs.find(DNET_LICENSES) + .map(voc -> voc.getTermBySynonym(i.getLicense().getValue())) + .map(VocabularyTerm::getId) + .ifPresent(license -> i.getLicense().setValue(license)); + } + // from the script from Dimitris if ("0000".equals(i.getRefereed().getClassid())) { final boolean isFromCrossref = Optional From 321922772b71c70e074eb97efb46d920f1031752 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 5 Dec 2023 16:37:04 +0100 Subject: [PATCH 02/19] added serialization for the new fields imported for the Irish tender --- .../oa/provision/utils/XmlRecordFactory.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 3b810ba8c2..4d9d9c341f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -434,6 +434,24 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils .asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue())); } + if (r.getIsGreen() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("isgreen", Boolean.toString(r.getIsGreen()))); + } + if (r.getOpenAccessColor() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("openaccesscolor", r.getOpenAccessColor().toString())); + } + if (r.getIsInDiamondJournal() != null) { + metadata + .add( + XmlSerializationUtils + .asXmlElement("isindiamondjournal", Boolean.toString(r.getIsInDiamondJournal()))); + } + if (r.getPubliclyFunded() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("publiclyfunded", Boolean.toString(r.getPubliclyFunded()))); + } } switch (type) { From 431c6bb08a2713df4cb1dbe0abeb60011c409349 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 6 Dec 2023 11:06:46 +0100 Subject: [PATCH 03/19] [dedup] added isLookupUrl to the graph consistency workflow definition, required now by the entity grouping phase --- .../dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index 5e2fc0a016..306229e79d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -16,6 +16,10 @@ filterInvisible whether filter out invisible entities after merge + + isLookupUrl + the URL address of the lookUp service + sparkDriverMemory heap memory for driver process @@ -128,6 +132,7 @@ --graphInputPath${graphBasePath} --checkpointPath${workingPath}/grouped_entities --outputPath${graphOutputPath} + --isLookupUrl${isLookupUrl} --filterInvisible${filterInvisible} From 259c69e446c855a3f3da8cfec532f2ac0e2ff87e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 6 Dec 2023 19:41:53 +0100 Subject: [PATCH 04/19] [orcid enrichment] fixed workflow definition --- .../dhp/enrich/orcid/oozie_app/workflow.xml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml index a4330b966b..ce117b5e9a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml @@ -38,16 +38,12 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --orcidPath - ${orcidPath} - --targetPath - ${targetPath} - --graphPath - ${graphPath} - --master - yarn + --orcidPath${orcidPath} + --targetPath${targetPath} + --graphPath${graphPath} + --masteryarn - + From c0cde53bf647d00bf5d633ef0ef906b4984d35b5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 7 Dec 2023 10:08:35 +0100 Subject: [PATCH 05/19] [bulktagging] setting first step of bulktaggin as the copy of the entities and relations not involved in the tagging' --- .../resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml index 0d4d1f046d..03373eda0b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml @@ -29,7 +29,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 336fb31d8778cbfbb75a555165b55ee48518a9ba Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 7 Dec 2023 10:27:25 +0100 Subject: [PATCH 06/19] [community_result_propagation] adjusting starting poit of workflow --- .../resulttocommunityfromorganization/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml index dfa762ac67..e342bce231 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -25,7 +25,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From c381bacee01b7f5a3918e0e77d4a55d8e969c04b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Dec 2023 14:07:11 +0100 Subject: [PATCH 07/19] [enrichment] passing the community API base URL --- .../resulttocommunityfromproject/oozie_app/workflow.xml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml index 21cc2d8874..d0784c94d3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml @@ -4,11 +4,14 @@ sourcePath the source path - outputPath the output path + + baseURL + the community API base URL + @@ -104,7 +107,7 @@ --sourcePath${sourcePath}/relation --outputPath${workingDir}/preparedInfo/resultCommunityList - --production${production} + --baseURL${baseURL} From 70eb1796b20c1001d4c49655e72dbbe0c791252a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Dec 2023 14:08:04 +0100 Subject: [PATCH 08/19] logging typo --- .../resulttocommunityfromproject/PrepareResultCommunitySet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index 7fed2606b4..467e11a969 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -53,7 +53,7 @@ public class PrepareResultCommunitySet { log.info("outputPath: {}", outputPath); final String baseURL = parser.get("baseURL"); - log.info("baseUEL: {}", baseURL); + log.info("baseURL: {}", baseURL); final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL); From cb71a7936bb98f498693fe75335c60947f5cbef6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Dec 2023 23:09:54 +0100 Subject: [PATCH 09/19] [graph cleaning] avoid stack overflow error when navigating Oaf objects declaring an Enum --- .../dhp/oa/graph/clean/OafCleaner.java | 1 + .../graph/clean/CleanGraphSparkJobTest.java | 32 +++++++++++++++++++ .../dhp/oa/graph/clean/graph/orp/orp.json | 1 + 3 files changed, 34 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/orp/orp.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 102a1fa853..4cfec6fe71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -59,6 +59,7 @@ public class OafCleaner implements Serializable { private static boolean isPrimitive(Object o) { return Objects.isNull(o) || o.getClass().isPrimitive() + || o.getClass().isEnum() || o instanceof Class || o instanceof Integer || o instanceof Double diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java index a1a4c29cd3..4ae3f82c2f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJobTest.java @@ -690,6 +690,38 @@ public class CleanGraphSparkJobTest { } + @Test + void testClean_ORP() throws Exception { + final String prefix = "gcube "; + + new CleanGraphSparkJob( + args( + "/eu/dnetlib/dhp/oa/graph/input_clean_graph_parameters.json", + new String[] { + "--inputPath", graphInputPath + "/orp", + "--outputPath", graphOutputPath + "/orp", + "--isLookupUrl", "lookupurl", + "--graphTableClassName", OtherResearchProduct.class.getCanonicalName(), + "--deepClean", "true", + "--contextId", "sobigdata", + "--verifyParam", "gCube ", + "--masterDuplicatePath", dsMasterDuplicatePath, + "--country", "NL", + "--verifyCountryParam", "10.17632", + "--collectedfrom", "NARCIS", + "--hostedBy", Objects + .requireNonNull( + getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/hostedBy")) + .getPath() + })).run(false, isLookUpService); + + Dataset orp = read(spark, graphOutputPath + "/orp", OtherResearchProduct.class); + + assertEquals(1, orp.count()); + + } + @Test void testCleanCfHbSparkJob() throws Exception { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/orp/orp.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/orp/orp.json new file mode 100644 index 0000000000..85045e3612 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/graph/orp/orp.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|openaire____::fffd45256148b1de3114788f21179083","value":"JAIRO","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.8","inferenceprovenance":"dedup-result-decisiontree-v3","provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1701811731058,"id":"50|dedup_wf_001::5ff833ef4a38d08e840935d5c52fc479","originalId":["oai:irdb.nii.ac.jp:01292:0003046643","50|jairo_______::5ff833ef4a38d08e840935d5c52fc479","oai:irdb.nii.ac.jp:0108/00008334","50|jairo_______::f542b980618630c53c3f8052588dca25"],"pid":[],"dateofcollection":"2023-07-12T02:35:49.772Z","dateoftransformation":"2023-07-12T13:51:55.356Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2023-07-12T02:35:49.772Z","altered":true,"baseURL":"https%3A%2F%2Firdb.nii.ac.jp%2Foai","identifier":"oai:irdb.nii.ac.jp:01292:0003046643","datestamp":"2023-07-08T10:54:57Z","metadataNamespace":""}},"measures":[{"id":"influence","unit":[{"key":"score","value":"3.244804E-9","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"key":"class","value":"C5","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity","unit":[{"key":"score","value":"6.0055183E-10","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"key":"class","value":"C5","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"influence_alt","unit":[{"key":"score","value":"0","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"key":"class","value":"C5","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"key":"class","value":"C5","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"impulse","unit":[{"key":"score","value":"0","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"key":"class","value":"C5","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]}],"processingchargeamount":null,"processingchargecurrency":null,"author":[{"fullname":"Shimazaki, Kazushi","name":"Kazushi","surname":"Shimazaki","rank":2,"pid":[],"affiliation":null}],"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"metaResourceType":null,"language":{"classid":"jpn","classname":"Japanese","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Reinforced concrete structure","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"damage level","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"柱","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"shear span ratio","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"損傷レベル","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"column","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"鉄筋コンクリート構造","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"残存軸耐力","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Residual Axis Load Capacity","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"せん断スパン比","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Eximental study odual axis loadity of reinforced concrete column","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"RC造柱の地震後の残存軸耐力に関する研究","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Shear failure of columns may lead to collapse for existence buildings.After a severe earthquake,some buildings collapsed by loss of axial capacity of the columns,however, some buildings have stayed even many columns were fractured by shear.For the examinaton of building collapse,it is required to investigate the axial load capacity of the columns after shear failure and to analyze the building performance using the pertinent analytical model for the columns.This paper investigates the analytical model for the collapse analysis of a building based on Yoshimura’s concept of the failure surface contraction. The reduction rate was obtained as a function of the maximum deformation,shear margin ratio,and so on,using the regression analydid of experimental results. The tested results are explained by this formula with good agreement.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Article","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"研究報告書ページ(113)-(118)","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"学術フロンティア研究プロジェクト(2005年度~2009年度) 2008年度 研究成果報告書","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2008-03-31","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"神奈川大学","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[{"value":"https://kanagawa-u.repo.nii.ac.jp/record/4315/files/2008-28.pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"http://klibredb.lib.kanagawa-u.ac.jp/dspace/bitstream/10487/10662/1/2008-28.pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"format":[{"value":"application/pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0000","classname":"Unknown","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"instanceTypeMapping":[{"originalType":"Research Paper","typeCode":null,"typeLabel":null,"vocabularyName":"openaire::coar_resource_types_3_1"}],"hostedby":{"key":"10|openaire____::fffd45256148b1de3114788f21179083","value":"JAIRO","dataInfo":null},"url":["https://kanagawa-u.repo.nii.ac.jp/records/4315"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fffd45256148b1de3114788f21179083","value":"JAIRO","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2008-03-31","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0002","classname":"nonPeerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null,"fulltext":"https://kanagawa-u.repo.nii.ac.jp/record/4315/files/2008-28.pdf"},{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0000","classname":"Unknown","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"instanceTypeMapping":[{"originalType":"Article","typeCode":"http://purl.org/coar/resource_type/c_2df8fbb1","typeLabel":"research article","vocabularyName":"openaire::coar_resource_types_3_1"},{"originalType":"http://purl.org/coar/resource_type/c_2df8fbb1","typeCode":"Article","typeLabel":"Article","vocabularyName":"openaire::user_resource_types"}],"hostedby":{"key":"10|openaire____::fffd45256148b1de3114788f21179083","value":"JAIRO","dataInfo":null},"url":["http://hdl.handle.net/10487/10662"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fffd45256148b1de3114788f21179083","value":"JAIRO","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2008-03-31","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0002","classname":"nonPeerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null,"fulltext":"http://klibredb.lib.kanagawa-u.ac.jp/dspace/bitstream/10487/10662/1/2008-28.pdf"}],"eoscifguidelines":[],"openAccessColor":"bronze","publiclyFunded":false,"contactperson":[],"contactgroup":[],"tool":[],"isGreen":true,"isInDiamondJournal":false} \ No newline at end of file From aba95ed1d17a1683729ebddf2afff785bcb5e138 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 8 Dec 2023 17:06:19 +0100 Subject: [PATCH 10/19] code formatting --- .../oaf/utils/GraphCleaningFunctions.java | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 2c1bc48e36..e6ae379089 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -16,7 +16,6 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; -import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm; import org.apache.commons.lang3.StringUtils; import com.github.sisyphsu.dateparser.DateParserUtils; @@ -24,6 +23,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -416,10 +416,11 @@ public class GraphCleaningFunctions extends CleaningFunctions { .replaceAll(NAME_CLEANING_REGEX, " ")); if (vocs.vocabularyExists(DNET_PUBLISHERS)) { - vocs.find(DNET_PUBLISHERS) - .map(voc -> voc.getTermBySynonym(r.getPublisher().getValue())) - .map(VocabularyTerm::getName) - .ifPresent(publisher -> r.getPublisher().setValue(publisher)); + vocs + .find(DNET_PUBLISHERS) + .map(voc -> voc.getTermBySynonym(r.getPublisher().getValue())) + .map(VocabularyTerm::getName) + .ifPresent(publisher -> r.getPublisher().setValue(publisher)); } } } @@ -582,10 +583,11 @@ public class GraphCleaningFunctions extends CleaningFunctions { } if (Objects.nonNull(i.getLicense()) && Objects.nonNull(i.getLicense().getValue())) { - vocs.find(DNET_LICENSES) - .map(voc -> voc.getTermBySynonym(i.getLicense().getValue())) - .map(VocabularyTerm::getId) - .ifPresent(license -> i.getLicense().setValue(license)); + vocs + .find(DNET_LICENSES) + .map(voc -> voc.getTermBySynonym(i.getLicense().getValue())) + .map(VocabularyTerm::getId) + .ifPresent(license -> i.getLicense().setValue(license)); } // from the script from Dimitris From 7e8eff40c1660d5d277fdc36ff87d2e264e6549d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Dec 2023 08:54:15 +0100 Subject: [PATCH 11/19] [graph provision] added tests for the new model fields --- .../dhp/oa/provision/XmlIndexingJobTest.java | 34 ++- .../oa/provision/XmlRecordFactoryTest.java | 8 +- .../eu/dnetlib/dhp/oa/provision/fields.xml | 242 ++++++++++-------- .../dnetlib/dhp/oa/provision/publication.json | 10 +- .../conf/exploreTestConfig/managed-schema | 18 +- .../solr/conf/testConfig/managed-schema | 6 + 6 files changed, 183 insertions(+), 135 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index 6f19565784..47f0ec8c39 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.provision; import java.io.IOException; import java.io.StringReader; import java.net.URI; +import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; @@ -11,6 +12,7 @@ import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrInputField; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -30,6 +32,8 @@ import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.assertEquals; + @ExtendWith(MockitoExtension.class) public class XmlIndexingJobTest extends SolrTest { @@ -101,14 +105,34 @@ public class XmlIndexingJobTest extends SolrTest { new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null) .run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*")); - Assertions - .assertEquals( + assertEquals( nRecord, rsp.getResults().getNumFound(), "the number of indexed records should be equal to the number of input records"); + + + rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true")); + assertEquals( + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isgreen = true"); + + rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); + assertEquals( + 0, rsp.getResults().getNumFound(), + "the number of indexed records having openaccesscolor = bronze"); + + rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); + assertEquals( + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isindiamondjournal = true"); + + rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); + assertEquals( + 0, rsp.getResults().getNumFound(), + "the number of indexed records having publiclyfunded = publiclyfunded"); } @Test @@ -126,7 +150,7 @@ public class XmlIndexingJobTest extends SolrTest { .map(s -> new SAXReader().read(new StringReader(s)).valueOf(ID_XPATH)) .distinct() .count(); - Assertions.assertEquals(nRecord, xmlIdUnique, "IDs should be unique among input records"); + assertEquals(nRecord, xmlIdUnique, "IDs should be unique among input records"); final String outputPath = workingDir.resolve("outputPath").toAbsolutePath().toString(); new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.HDFS, outputPath) @@ -142,7 +166,7 @@ public class XmlIndexingJobTest extends SolrTest { }, Encoders.STRING()) .distinct() .count(); - Assertions.assertEquals(xmlIdUnique, docIdUnique, "IDs should be unique among the output records"); + assertEquals(xmlIdUnique, docIdUnique, "IDs should be unique among the output records"); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 88bffd0e79..097d36d054 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -32,7 +32,7 @@ public class XmlRecordFactoryTest { .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test - public void testXMLRecordFactory() throws IOException, DocumentException { + void testXMLRecordFactory() throws IOException, DocumentException { final ContextMapper contextMapper = new ContextMapper(); @@ -93,6 +93,12 @@ public class XmlRecordFactoryTest { "https://osf.io/preprints/socarxiv/7vgtu/download", doc.valueOf("//*[local-name() = 'result']/fulltext[1]")); + assertEquals("true", doc.valueOf("//*[local-name() = 'result']/isgreen/text()")); + assertEquals("bronze", doc.valueOf("//*[local-name() = 'result']/openaccesscolor/text()")); + assertEquals("true", doc.valueOf("//*[local-name() = 'result']/isindiamondjournal/text()")); + assertEquals("true", doc.valueOf("//*[local-name() = 'result']/publiclyfunded/text()")); + + System.out.println(doc.asXML()); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index 0bf588a57b..513ead1dd0 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -1,116 +1,134 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index 4ba20292c6..a89ec62d55 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -1048,8 +1048,8 @@ "schemename": "dnet:dataCite_resource" }, "refereed": { - "classid": "0001", - "classname": "peerReviewed", + "classid": "0002", + "classname": "nonPeerReviewed", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels" }, @@ -1949,5 +1949,9 @@ "schemename": "dnet:provenanceActions" } } - } + }, + "isGreen": true, + "openAccessColor": "bronze", + "isInDiamondJournal": true, + "publiclyFunded": true } \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema index 39c811f83f..28a4396190 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema @@ -219,20 +219,6 @@ - - - - - - - - - - - - - - @@ -368,6 +354,10 @@ + + + + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema index 977e0b2d72..962e0cc6fd 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema @@ -365,7 +365,13 @@ + + + + + + From 84d54643cf55d86d42cea236569c84a1b2c339bc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Dec 2023 09:57:00 +0100 Subject: [PATCH 12/19] [cleaning] allow enriched orcids to pass the cleaning, rule out non-orcid author pids --- .../dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index e6ae379089..8700ea527f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -689,6 +689,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { .filter(Objects::nonNull) .filter(p -> Objects.nonNull(p.getQualifier())) .filter(p -> StringUtils.isNotBlank(p.getValue())) + .filter(p -> StringUtils.contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID)) .map(p -> { // hack to distinguish orcid from orcid_pending String pidProvenance = getProvenance(p.getDataInfo()); @@ -698,7 +699,8 @@ public class GraphCleaningFunctions extends CleaningFunctions { .toLowerCase() .contains(ModelConstants.ORCID)) { if (pidProvenance - .equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) { + .equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY) || + pidProvenance.equals("ORCID_ENRICHMENT")) { p.getQualifier().setClassid(ModelConstants.ORCID); } else { p.getQualifier().setClassid(ModelConstants.ORCID_PENDING); From 98cce5bfb22c5d761fbf7044834959efdec4e31e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Dec 2023 09:59:05 +0100 Subject: [PATCH 13/19] code formatting --- .../dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 8700ea527f..0124e96fc1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -689,7 +689,9 @@ public class GraphCleaningFunctions extends CleaningFunctions { .filter(Objects::nonNull) .filter(p -> Objects.nonNull(p.getQualifier())) .filter(p -> StringUtils.isNotBlank(p.getValue())) - .filter(p -> StringUtils.contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID)) + .filter( + p -> StringUtils + .contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID)) .map(p -> { // hack to distinguish orcid from orcid_pending String pidProvenance = getProvenance(p.getDataInfo()); From ff924215b8e457b3e05e98792db0c6b3bdd0fa0d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Dec 2023 11:21:30 +0100 Subject: [PATCH 14/19] [graph provision] added tests for new peerreviewed field --- .../provision/IndexRecordTransformerTest.java | 20 ++++++++++++++++++- .../dhp/oa/provision/XmlIndexingJobTest.java | 7 ++++++- .../oa/provision/XmlRecordFactoryTest.java | 2 -- .../eu/dnetlib/dhp/oa/provision/fields.xml | 3 +++ .../conf/exploreTestConfig/managed-schema | 1 + .../solr/conf/testConfig/managed-schema | 1 + 6 files changed, 30 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index ce593cf076..e07ba1b4ea 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -78,6 +78,22 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + void testPeerReviewed() throws IOException, TransformerException { + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final Publication p = load("publication.json", Publication.class); + + final JoinedEntity je = new JoinedEntity<>(p); + final String record = xmlRecordFactory.build(je); + assertNotNull(record); + SolrInputDocument solrDoc = testRecordTransformation(record); + + assertEquals("true", solrDoc.get("peerreviewed").getValue()); + } + @Test public void testRiunet() throws IOException, TransformerException { @@ -184,7 +200,7 @@ public class IndexRecordTransformerTest { } } - private void testRecordTransformation(final String record) throws IOException, TransformerException { + private SolrInputDocument testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); @@ -200,6 +216,8 @@ public class IndexRecordTransformerTest { Assertions.assertNotNull(xmlDoc); System.out.println(xmlDoc); + + return solrDoc; } private T load(final String fileName, final Class clazz) throws IOException { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index 47f0ec8c39..b62acbac34 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -132,7 +132,12 @@ public class XmlIndexingJobTest extends SolrTest { rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); assertEquals( 0, rsp.getResults().getNumFound(), - "the number of indexed records having publiclyfunded = publiclyfunded"); + "the number of indexed records having publiclyfunded = true"); + + rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); + assertEquals( + 0, rsp.getResults().getNumFound(), + "the number of indexed records having peerreviewed = true"); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 097d36d054..de69795f88 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -97,8 +97,6 @@ public class XmlRecordFactoryTest { assertEquals("bronze", doc.valueOf("//*[local-name() = 'result']/openaccesscolor/text()")); assertEquals("true", doc.valueOf("//*[local-name() = 'result']/isindiamondjournal/text()")); assertEquals("true", doc.valueOf("//*[local-name() = 'result']/publiclyfunded/text()")); - - System.out.println(doc.asXML()); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index 513ead1dd0..ba3fa83df1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -74,6 +74,9 @@ + + + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema index 28a4396190..4e85ca3be1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema @@ -358,6 +358,7 @@ + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema index 962e0cc6fd..e191c6223b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/testConfig/managed-schema @@ -371,6 +371,7 @@ + From 01ce0b9c7626cb853acd218db011a498cb8d06e2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 Dec 2023 12:24:55 +0100 Subject: [PATCH 15/19] [doiboost - preprocess] remove transition to orcid preparation from sequence of steps at the beginning of the workflow --- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index ed6853229a..309031ee40 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -34,18 +34,6 @@ the MAG working path - - - - inputPathOrcid - the ORCID input path - - - - workingPathOrcid - the ORCID working path - - @@ -65,7 +53,6 @@ ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} - ${wf:conf('resumeFrom') eq 'PreProcessORCID'} From 9d342a47da489d71c3a739b06943a4f09a1225ee Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 18 Dec 2023 11:48:57 +0100 Subject: [PATCH 16/19] updated the transformation Baseline workflow to include mdstore rollback/commit action --- .../dhp/sx/bio/pubmed/oozie_app/workflow.xml | 69 +++++++++++++++++-- .../ebi/SparkCreateBaselineDataFrame.scala | 18 +++-- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 8915a090bd..30eb414698 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + baselineWorkingPath @@ -9,8 +9,12 @@ The IS lookUp service endopoint - targetPath - The target path + mdStoreOutputId + the identifier of the cleaned MDStore + + + mdStoreManagerURI + the path of the cleaned mdstore skipUpdate @@ -19,12 +23,31 @@ - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreOutputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + yarn @@ -43,16 +66,52 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --workingPath${baselineWorkingPath} - --targetPath${targetPath} + --mdstoreOutputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --masteryarn --isLookupUrl${isLookupUrl} --hdfsServerUri${nameNode} --skipUpdate${skipUpdate} + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 8ac8b00bfa..639918151b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -2,9 +2,12 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} import eu.dnetlib.dhp.sx.bio.pubmed._ +import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration @@ -164,11 +167,15 @@ object SparkCreateBaselineDataFrame { val workingPath = parser.get("workingPath") log.info("workingPath: {}", workingPath) - val targetPath = parser.get("targetPath") - log.info("targetPath: {}", targetPath) + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion) + + val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info("outputBasePath: {}", outputBasePath) val hdfsServerUri = parser.get("hdfsServerUri") - log.info("hdfsServerUri: {}", targetPath) + log.info("hdfsServerUri: {}", hdfsServerUri) val skipUpdate = parser.get("skipUpdate") log.info("skipUpdate: {}", skipUpdate) @@ -216,8 +223,11 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)) .as[Oaf] .filter(p => p != null), - targetPath + s"$outputBasePath/$MDSTORE_DATA_PATH" ) + val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH") + val mdStoreSize = df.count + writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH") } } From 15fd93a2b66f6829cfac0b1350266664371d1df5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 18 Dec 2023 12:21:55 +0100 Subject: [PATCH 17/19] uploaded input parameters on CreateBaseline WF --- .../eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json index 8dc8a2aaeb..3ba83764df 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json @@ -2,7 +2,7 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the oaf path ", "paramRequired": true}, {"paramName":"s", "paramLongName":"skipUpdate", "paramDescription": "skip update ", "paramRequired": false}, {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file From 5011c4d11a4c3884c99d784ed31a336ba89f8bfc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Dec 2023 15:57:26 +0100 Subject: [PATCH 18/19] refactoring after compiletion --- .../provision/IndexRecordTransformerTest.java | 2 +- .../dhp/oa/provision/XmlIndexingJobTest.java | 29 +++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e07ba1b4ea..e728830554 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -82,7 +82,7 @@ public class IndexRecordTransformerTest { void testPeerReviewed() throws IOException, TransformerException { final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, - XmlConverterJob.schemaLocation); + XmlConverterJob.schemaLocation); final Publication p = load("publication.json", Publication.class); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index b62acbac34..a3a140cf64 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.io.StringReader; import java.net.URI; @@ -32,8 +34,6 @@ import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import static org.junit.jupiter.api.Assertions.assertEquals; - @ExtendWith(MockitoExtension.class) public class XmlIndexingJobTest extends SolrTest { @@ -110,34 +110,33 @@ public class XmlIndexingJobTest extends SolrTest { QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*")); assertEquals( - nRecord, rsp.getResults().getNumFound(), - "the number of indexed records should be equal to the number of input records"); - + nRecord, rsp.getResults().getNumFound(), + "the number of indexed records should be equal to the number of input records"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isgreen = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isgreen = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having openaccesscolor = bronze"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having openaccesscolor = bronze"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isindiamondjournal = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isindiamondjournal = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having publiclyfunded = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having publiclyfunded = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having peerreviewed = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having peerreviewed = true"); } @Test From 62104790ae63d08946f0e340d67f3182cb469b8d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 Dec 2023 12:26:19 +0100 Subject: [PATCH 19/19] added metaresourcetype to the result hive DB view --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 149c326faa..748f77b277 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,10 +1,10 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o;