1
0
Fork 0

adjustments for country propagation

This commit is contained in:
Miriam Baglioni 2023-12-22 11:27:30 +01:00
parent 4740c808f7
commit 3afd4aa57b
8 changed files with 130 additions and 18 deletions

View File

@ -45,7 +45,7 @@ public class PrepareDatasourceCountryAssociation {
.toString( .toString(
PrepareDatasourceCountryAssociation.class PrepareDatasourceCountryAssociation.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json")); "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -90,7 +90,8 @@ public class PrepareDatasourceCountryAssociation {
(FilterFunction<Datasource>) ds -> !ds.getDataInfo().getDeletedbyinference() && (FilterFunction<Datasource>) ds -> !ds.getDataInfo().getDeletedbyinference() &&
Optional.ofNullable(ds.getDatasourcetype()).isPresent() && Optional.ofNullable(ds.getDatasourcetype()).isPresent() &&
Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() && Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() &&
(allowedtypes.contains(ds.getJurisdiction().getClassid()) || ((Optional.ofNullable(ds.getJurisdiction()).isPresent() &&
allowedtypes.contains(ds.getJurisdiction().getClassid())) ||
whitelist.contains(ds.getId()))); whitelist.contains(ds.getId())));
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass

View File

@ -32,7 +32,7 @@ public class PrepareResultCountrySet {
.toString( .toString(
PrepareResultCountrySet.class PrepareResultCountrySet.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json")); "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);

View File

@ -35,7 +35,7 @@ public class SparkCountryPropagationJob {
.toString( .toString(
SparkCountryPropagationJob.class SparkCountryPropagationJob.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json")); "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);

View File

@ -60,7 +60,7 @@ public class PrepareInfo implements Serializable {
.toString( .toString(
SparkResultToOrganizationFromIstRepoJob.class SparkResultToOrganizationFromIstRepoJob.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json")); "/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);

View File

@ -0,0 +1,32 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "p",
"paramLongName": "preparedInfoPath",
"paramDescription": "the path where prepared info have been stored",
"paramRequired": false
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
}
]

View File

@ -0,0 +1,32 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "whitelist",
"paramDescription": "the datasource having a type different from the allowed ones but that we want to add anyway",
"paramRequired": true
},
{
"paramName": "at",
"paramLongName": "allowedtypes",
"paramDescription": "the allowed datasource types for country propagation",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
}
]

View File

@ -0,0 +1,38 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName":"out",
"paramLongName":"outputPath",
"paramDescription": "the output path",
"paramRequired": true
},
{
"paramName":"w",
"paramLongName":"workingPath",
"paramDescription": "the working path",
"paramRequired": true
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName": "p",
"paramLongName": "preparedInfoPath",
"paramDescription": "the path where prepared info have been stored",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
}
]

View File

@ -61,7 +61,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}</arg> <arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--whitelist</arg><arg>${whitelist}</arg> <arg>--whitelist</arg><arg>${whitelist}</arg>
<arg>--allowedtypes</arg><arg>${allowedtypes}</arg> <arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="fork_prepare_result_country"/> <ok to="fork_prepare_result_country"/>
<error to="Kill"/> <error to="Kill"/>
@ -95,8 +95,10 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
<arg>--workingPath</arg><arg>${workingDir}/workingP</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait_prepare"/> <ok to="wait_prepare"/>
<error to="Kill"/> <error to="Kill"/>
@ -123,8 +125,10 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
<arg>--workingPath</arg><arg>${workingDir}/workingD</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait_prepare"/> <ok to="wait_prepare"/>
<error to="Kill"/> <error to="Kill"/>
@ -151,8 +155,10 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
<arg>--workingPath</arg><arg>${workingDir}/workingO</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait_prepare"/> <ok to="wait_prepare"/>
<error to="Kill"/> <error to="Kill"/>
@ -179,14 +185,16 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--outputPath</arg><arg>${workingDir}/software</arg>
<arg>--workingPath</arg><arg>${workingDir}/workingS</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait_prepare"/> <ok to="wait_prepare"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="wait_prepare" to="fork_join_apply_country_propagation"/> <join name="wait_prepare" to="fork_apply_country_propagation"/>
<fork name="fork_apply_country_propagation"> <fork name="fork_apply_country_propagation">
<path start="propagation_publication"/> <path start="propagation_publication"/>
@ -216,9 +224,9 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/country/publication</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
@ -245,9 +253,9 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/country/dataset</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
@ -274,9 +282,9 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/country/otherresearchproduct</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
@ -303,8 +311,9 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--workingPath</arg><arg>${workingDir}/country</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/country/software</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>