BIPAffiliations to include also input data from publisher websites

This commit is contained in:
Claudio Atzori 2024-08-07 15:46:44 +02:00
parent 152cb47375
commit ce78752aa3
3 changed files with 23 additions and 16 deletions

View File

@ -28,18 +28,19 @@
"paramLongName": "dataciteInputPath", "paramLongName": "dataciteInputPath",
"paramDescription": "the path to get the input data from Datacite", "paramDescription": "the path to get the input data from Datacite",
"paramRequired": true "paramRequired": true
},{ },
{
"paramName": "wip", "paramName": "wip",
"paramLongName": "webCrawlInputPath", "paramLongName": "webCrawlInputPath",
"paramDescription": "the path to get the input data from Web Crawl", "paramDescription": "the path to get the input data from Web Crawl",
"paramRequired": true "paramRequired": true
},{ },
{
"paramName": "pip", "paramName": "pip",
"paramLongName": "publisherInputPath", "paramLongName": "publisherInputPath",
"paramDescription": "the path to get the input data from publishers", "paramDescription": "the path to get the input data from publishers",
"paramRequired": true "paramRequired": true
} },
,
{ {
"paramName": "o", "paramName": "o",
"paramLongName": "outputPath", "paramLongName": "outputPath",

View File

@ -21,6 +21,10 @@
<name>webCrawlInputPath</name> <name>webCrawlInputPath</name>
<description>the path where to find the inferred affiliation relations from webCrawl</description> <description>the path where to find the inferred affiliation relations from webCrawl</description>
</property> </property>
<property>
<name>publisherInputPath</name>
<description>the path where to find the inferred affiliation relations from publisher websites</description>
</property>
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<description>the path where to store the actionset</description> <description>the path where to store the actionset</description>
@ -117,6 +121,7 @@
<arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg> <arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
<arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg> <arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
<arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg> <arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg>
<arg>--publisherInputPath</arg><arg>${publisherInputPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${outputPath}</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>

View File

@ -150,11 +150,12 @@ public class PrepareAffiliationRelationsTest {
.get(0) .get(0)
.getString(4)); .getString(4));
final String publisherid = ID_PREFIX
final String publisherid = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9")); + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9"));
final String rorId = "20|ror_________::" + IdentifierFactory.md5("https://ror.org/03265fv13"); final String rorId = "20|ror_________::" + IdentifierFactory.md5("https://ror.org/03265fv13");
Assertions.assertEquals(1, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId +"'").count() Assertions
); .assertEquals(
1, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId + "'").count());
} }
} }