diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
index 27bb097f9d..2baec0e687 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
@@ -35,6 +35,12 @@
"paramLongName": "preparedInfoPath",
"paramDescription": "the path where prepared info have been stored",
"paramRequired": true
+ },
+ {
+ "paramName":"test",
+ "paramLongName":"isTest",
+ "paramDescription": "true if it is executing a test",
+ "paramRequired": false
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
index f61db6f346..3ba3c8e9c7 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
@@ -1,34 +1,10 @@
[
- {
- "paramName":"is",
- "paramLongName":"isLookupUrl",
- "paramDescription": "URL of the isLookUp Service",
- "paramRequired": true
- },
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
- {
- "paramName":"as",
- "paramLongName":"allowedsemrels",
- "paramDescription": "the allowed semantic relations for propagation",
- "paramRequired": true
- },
- {
- "paramName":"h",
- "paramLongName":"hive_metastore_uris",
- "paramDescription": "the hive metastore uris",
- "paramRequired": true
- },
- {
- "paramName":"sg",
- "paramLongName":"saveGraph",
- "paramDescription": "true if the new version of the graph must be saved",
- "paramRequired": false
- },
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
@@ -40,11 +16,5 @@
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
- },
- {
- "paramName":"tn",
- "paramLongName":"resultTableName",
- "paramDescription": "the name of the result table we are currently working on",
- "paramRequired": true
- }
+ }
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
index 90b5974ea4..a5dfefc0a0 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json
@@ -5,12 +5,6 @@
"paramDescription": "URL of the isLookUp Service",
"paramRequired": true
},
- {
- "paramName":"mt",
- "paramLongName":"master",
- "paramDescription": "should be local or yarn",
- "paramRequired": true
- },
{
"paramName":"s",
"paramLongName":"sourcePath",
@@ -30,16 +24,21 @@
"paramRequired": true
},
{
- "paramName":"wu",
- "paramLongName":"writeUpdate",
- "paramDescription": "true if the update must be writte. No double check if information is already present",
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
- "paramName":"sg",
- "paramLongName":"saveGraph",
- "paramDescription": "true if the new version of the graph must be saved",
- "paramRequired": true
- }
-
+ "paramName":"tn",
+ "paramLongName":"resultTableName",
+ "paramDescription": "the name of the result table we are currently working on",
+ "paramRequired": true
+ }
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml
index ea3a4d9223..2744ea92ba 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml
@@ -19,4 +19,40 @@
hive_metastore_uris
thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+ spark2YarnHistoryServerAddress
+ http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
+
+
+ spark2EventLogDir
+ /user/spark/spark2ApplicationHistory
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+
+
+ sparkExecutorNumber
+ 4
+
+
+ sparkDriverMemory
+ 15G
+
+
+ sparkExecutorMemory
+ 6G
+
+
+ sparkExecutorCores
+ 1
+
+
+ spark2MaxExecutors
+ 50
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
index 4312ec0682..d320bc9eb4 100644
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
@@ -8,54 +8,301 @@
allowedsemrels
the semantic relationships allowed for propagation
-
- sparkDriverMemory
- memory for driver process
-
-
- sparkExecutorMemory
- memory for individual executor
-
-
- sparkExecutorCores
- number of cores used by single executor
-
isLookupUrl
the isLookup service endpoint
-
+
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ ${nameNode}/${sourcePath}/relation
+ ${nameNode}/${workingDir}/projecttoresult_propagation/relation
+
+
+
+
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ ResultToCommunitySemRel-PreparePhase1-Publications
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/preparedInfo/targetCommunityAssoc
+ --allowedsemrels${allowedsemrels}
+ --isLookupUrl${isLookupUrl}
+
+
+
+
+
+
+ yarn
+ cluster
+ ResultToCommunitySemRel-PreparePhase1-Dataset
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/preparedInfo/targetCommunityAssoc
+ --allowedsemrels${allowedsemrels}
+ --isLookupUrl${isLookupUrl}
+
+
+
+
+
+
+ yarn
+ cluster
+ ResultToCommunitySemRel-PreparePhase1-ORP
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/preparedInfo/targetCommunityAssoc
+ --allowedsemrels${allowedsemrels}
+ --isLookupUrl${isLookupUrl}
+
+
+
+
+
+
+ yarn
+ cluster
+ ResultToCommunitySemRel-PreparePhase1-Software
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --sourcePath${sourcePath}
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/preparedInfo/targetCommunityAssoc
+ --allowedsemrels${allowedsemrels}
+ --isLookupUrl${isLookupUrl}
+
+
+
+
+
+
+
+
+
+ yarn
cluster
- ResultToCommunitySemRelPropagation
- eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob
+ ResultToCommunityEmRelPropagation-PreparePhase2
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2
dhp-propagation-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory}
- --executor-cores ${sparkExecutorCores}
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
- --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
- -mt yarn-cluster
- --sourcePath${sourcePath}
- --allowedsemrels${allowedsemrels}
- --hive_metastore_uris${hive_metastore_uris}
- --isLookupUrl${isLookupUrl}
+ --sourcePath${workingDir}/preparedInfo/targetCommunityAssoc
+ --outputPath${workingDir}/preparedInfo/mergedCommunityAssoc
-
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Result2CommunitySemRelPropagation-Publication
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob4
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc
+ --sourcePath${sourcePath}/publication
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/publication
+
+
+
+
+
+
+ yarn
+ cluster
+ Result2CommunitySemRelPropagation-Dataset
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob4
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc
+ --sourcePath${sourcePath}/dataset
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/dataset
+
+
+
+
+
+
+ yarn
+ cluster
+ Result2CommunitySemRelPropagation-ORP
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob4
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc
+ --sourcePath${sourcePath}/otherresearchproduct
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/otherresearchproduct
+
+
+
+
+
+
+ yarn
+ cluster
+ Result2CommunitySemRelPropagation-Software
+ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob4
+ dhp-propagation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.dynamicAllocation.enabled=true
+ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+
+ --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc
+ --sourcePath${sourcePath}/software
+ --hive_metastore_uris${hive_metastore_uris}
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/software
+
+
+
+
+
+
\ No newline at end of file