From ce78752aa33629f82c401443de88a53ba8ea1c69 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Aug 2024 15:46:44 +0200 Subject: [PATCH] BIPAffiliations to include also input data from publisher websites --- .../input_actionset_parameter.json | 25 ++++++++++--------- .../bipaffiliations/oozie_app/workflow.xml | 5 ++++ .../PrepareAffiliationRelationsTest.java | 9 ++++--- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json index 941f84525..f80d9e446 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json @@ -28,18 +28,19 @@ "paramLongName": "dataciteInputPath", "paramDescription": "the path to get the input data from Datacite", "paramRequired": true - },{ - "paramName": "wip", - "paramLongName": "webCrawlInputPath", - "paramDescription": "the path to get the input data from Web Crawl", - "paramRequired": true -},{ - "paramName": "pip", - "paramLongName": "publisherInputPath", - "paramDescription": "the path to get the input data from publishers", - "paramRequired": true -} -, + }, + { + "paramName": "wip", + "paramLongName": "webCrawlInputPath", + "paramDescription": "the path to get the input data from Web Crawl", + "paramRequired": true + }, + { + "paramName": "pip", + "paramLongName": "publisherInputPath", + "paramDescription": "the path to get the input data from publishers", + "paramRequired": true + }, { "paramName": "o", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml index 2e89c07fd..77683b6b3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml @@ -21,6 +21,10 @@ webCrawlInputPath the path where to find the inferred affiliation relations from webCrawl + + publisherInputPath + the path where to find the inferred affiliation relations from publisher websites + outputPath the path where to store the actionset @@ -117,6 +121,7 @@ --openapcInputPath${openapcInputPath} --dataciteInputPath${dataciteInputPath} --webCrawlInputPath${webCrawlInputPath} + --publisherInputPath${publisherInputPath} --outputPath${outputPath} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java index c90288f59..ac9977a7e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java @@ -150,11 +150,12 @@ public class PrepareAffiliationRelationsTest { .get(0) .getString(4)); - - final String publisherid = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9")); + final String publisherid = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9")); final String rorId = "20|ror_________::" + IdentifierFactory.md5("https://ror.org/03265fv13"); - Assertions.assertEquals(1, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId +"'").count() - ); + Assertions + .assertEquals( + 1, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId + "'").count()); } }