added resources and workflow for dump of community products

This commit is contained in:
Miriam Baglioni 2020-06-15 11:13:21 +02:00
parent f96ca900e1
commit e43eedb5b0
3 changed files with 38 additions and 25 deletions

View File

@ -1,10 +1,17 @@
[ [
{
"paramName":"map",
"paramLongName":"communityMap",
"paramDescription": "communityMap when testing",
"paramRequired": false
},
{ {
"paramName":"is", "paramName":"is",
"paramLongName":"isLookUpUrl", "paramLongName":"isLookUpUrl",
"paramDescription": "URL of the isLookUp Service", "paramDescription": "URL of the isLookUp Service",
"paramRequired": true "paramRequired": false
}, },
{ {
"paramName":"s", "paramName":"s",
@ -40,7 +47,7 @@
"paramName":"rt", "paramName":"rt",
"paramLongName":"resultType", "paramLongName":"resultType",
"paramDescription": "the name of the corresondent dump element ", "paramDescription": "the name of the corresondent dump element ",
"paramRequired": true "paramRequired": false
} }
] ]

View File

@ -1,4 +1,4 @@
<workflow-app name="import_graph_as_hive_DB" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="dump_community_products" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property> <property>
@ -120,12 +120,12 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${inputPath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--inputType</arg><arg>publication</arg> <arg>--resultType</arg><arg>publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Publication</arg> <arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/publication</arg> <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -148,12 +148,12 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${inputPath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--inputType</arg><arg>dataset</arg> <arg>--resultType</arg><arg>dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Dataset</arg> <arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg> <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -176,12 +176,12 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${inputPath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--inputType</arg><arg>otherresearchproduct</arg> <arg>--resultType</arg><arg>otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct</arg> <arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -204,12 +204,12 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${inputPath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--inputType</arg><arg>software</arg> <arg>--resultType</arg><arg>software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Software</arg> <arg>--dumpTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/software</arg> <arg>--outputPath</arg><arg>${workingDir}/software</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -234,7 +234,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${inputPath}</arg> <arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="fork_extendWithProject"/> <ok to="fork_extendWithProject"/>
@ -317,7 +317,7 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${workingDir}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/ext/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
@ -380,8 +380,8 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/ext/publication</arg> <arg>--sourcePath</arg><arg>${workingDir}/ext/publication</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Publication</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_split"/> <ok to="join_split"/>
<error to="Kill"/> <error to="Kill"/>
@ -406,8 +406,8 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/ext/dataset</arg> <arg>--sourcePath</arg><arg>${workingDir}/ext/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Dataset</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_split"/> <ok to="join_split"/>
<error to="Kill"/> <error to="Kill"/>
@ -431,8 +431,8 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/ext/orp</arg> <arg>--sourcePath</arg><arg>${workingDir}/ext/orp</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_split"/> <ok to="join_split"/>
<error to="Kill"/> <error to="Kill"/>
@ -456,8 +456,8 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/ext/software</arg> <arg>--sourcePath</arg><arg>${workingDir}/ext/software</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.dump.oaf.Software</arg>
<arg>--isLookUpUrl</arg><arg>${isLoohUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_split"/> <ok to="join_split"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -29,6 +29,12 @@
"paramLongName":"resultTableName", "paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on", "paramDescription": "the name of the result table we are currently working on",
"paramRequired": true "paramRequired": true
},
{
"paramName":"map",
"paramLongName":"communityMap",
"paramDescription": "communityMap when testing",
"paramRequired": false
} }
] ]