BIPAffiliations to include also input data from publisher websites

This commit is contained in:
Claudio Atzori 2024-08-07 15:46:44 +02:00
parent 152cb47375
commit ce78752aa3
3 changed files with 23 additions and 16 deletions

View File

@ -28,18 +28,19 @@
"paramLongName": "dataciteInputPath",
"paramDescription": "the path to get the input data from Datacite",
"paramRequired": true
},{
},
{
"paramName": "wip",
"paramLongName": "webCrawlInputPath",
"paramDescription": "the path to get the input data from Web Crawl",
"paramRequired": true
},{
},
{
"paramName": "pip",
"paramLongName": "publisherInputPath",
"paramDescription": "the path to get the input data from publishers",
"paramRequired": true
}
,
},
{
"paramName": "o",
"paramLongName": "outputPath",

View File

@ -21,6 +21,10 @@
<name>webCrawlInputPath</name>
<description>the path where to find the inferred affiliation relations from webCrawl</description>
</property>
<property>
<name>publisherInputPath</name>
<description>the path where to find the inferred affiliation relations from publisher websites</description>
</property>
<property>
<name>outputPath</name>
<description>the path where to store the actionset</description>
@ -117,6 +121,7 @@
<arg>--openapcInputPath</arg><arg>${openapcInputPath}</arg>
<arg>--dataciteInputPath</arg><arg>${dataciteInputPath}</arg>
<arg>--webCrawlInputPath</arg><arg>${webCrawlInputPath}</arg>
<arg>--publisherInputPath</arg><arg>${publisherInputPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>

View File

@ -150,11 +150,12 @@ public class PrepareAffiliationRelationsTest {
.get(0)
.getString(4));
final String publisherid = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9"));
final String publisherid = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9"));
final String rorId = "20|ror_________::" + IdentifierFactory.md5("https://ror.org/03265fv13");
Assertions.assertEquals(1, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId +"'").count()
);
Assertions
.assertEquals(
1, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId + "'").count());
}
}