From e8b3e972f23f7f08ea650b0c09882ac9b5a0dabe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jun 2020 11:25:05 +0200 Subject: [PATCH] changed the input params and the workflow definition to tackle the Result as all result product produced --- .../dhp/oa/graph/dump/input_parameters.json | 12 - .../dhp/oa/graph/dump/oozie_app/workflow.xml | 255 +++++++++--------- .../graph/dump/project_input_parameters.json | 6 - .../dhp/oa/graph/dump/split_parameters.json | 6 - 4 files changed, 135 insertions(+), 144 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json index 23d0c56a0..188b2f452 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json @@ -36,18 +36,6 @@ "paramLongName":"resultTableName", "paramDescription": "the name of the result table we are currently working on", "paramRequired": true - }, - { - "paramName":"dn", - "paramLongName":"dumpTableName", - "paramDescription": "the name of the corresondent dump element ", - "paramRequired": true - }, - { - "paramName":"rt", - "paramLongName":"resultType", - "paramDescription": "the name of the corresondent dump element ", - "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml index 5f04731d6..be3156e0d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml @@ -121,9 +121,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/publication - --resultTypepublication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --dumpTableNameeu.dnetlib.dhp.schema.dump.oaf.Publication --outputPath${workingDir}/publication --isLookUpUrl${isLookUpUrl} @@ -149,9 +147,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/dataset - --resultTypedataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --dumpTableNameeu.dnetlib.dhp.schema.dump.oaf.Dataset --outputPath${workingDir}/dataset --isLookUpUrl${isLookUpUrl} @@ -177,9 +173,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/otherresearchproduct - --resultTypeotherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --dumpTableNameeu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct --outputPath${workingDir}/otherresearchproduct --isLookUpUrl${isLookUpUrl} @@ -205,9 +199,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/software - --resultTypesoftware --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --dumpTableNameeu.dnetlib.dhp.schema.dump.oaf.Software --outputPath${workingDir}/software --isLookUpUrl${isLookUpUrl} @@ -267,7 +259,6 @@ --sourcePath${workingDir}/publication --outputPath${workingDir}/ext/publication - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.Publication --preparedInfoPath${workingDir}/preparedInfo @@ -293,7 +284,6 @@ --sourcePath${workingDir}/dataset --outputPath${workingDir}/ext/dataset - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.Dataset --preparedInfoPath${workingDir}/preparedInfo @@ -318,7 +308,6 @@ --sourcePath${workingDir}/otherresearchproduct --outputPath${workingDir}/ext/orp - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct --preparedInfoPath${workingDir}/preparedInfo @@ -343,128 +332,154 @@ --sourcePath${workingDir}/software --outputPath${workingDir}/ext/software - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.Software --preparedInfoPath${workingDir}/preparedInfo + - + + + yarn + cluster + Split dumped result for community + eu.dnetlib.dhp.oa.graph.dump.SparkSplitForCommunity + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/ext + --outputPath${outputPath} + + --isLookUpUrl${isLookUpUrl} + + + + + + - - - - - - + + + + + + - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.SparkSplitForCommunity - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext/publication - --outputPath${outputPath} - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.Publication - --isLookUpUrl${isLookUpUrl} - - - - + + + + + + + + + + + + + + + + + + + + + + + + + - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.SparkSplitForCommunity - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext/dataset - --outputPath${outputPath} - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.Dataset - --isLookUpUrl${isLookUpUrl} - - - - - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.SparkSplitForCommunity - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext/orp - --outputPath${outputPath} - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct - --isLookUpUrl${isLookUpUrl} - - - - - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.SparkSplitForCommunity - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext/software - --outputPath${outputPath} - --resultTableNameeu.dnetlib.dhp.schema.dump.oaf.Software - --isLookUpUrl${isLookUpUrl} - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json index e82801602..a13657b2a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json @@ -19,12 +19,6 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, { "paramName": "pip", "paramLongName": "preparedInfoPath", diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json index bf842e740..27df63e42 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json @@ -24,12 +24,6 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, { "paramName":"map", "paramLongName":"communityMap",