diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml
index 69f26b9616..1c6c00e9a7 100644
--- a/dhp-workflows/dhp-blacklist/pom.xml
+++ b/dhp-workflows/dhp-blacklist/pom.xml
@@ -10,6 +10,24 @@
4.0.0
dhp-blacklist
+
+
+ eu.dnetlib.dhp
+ dhp-graph-mapper
+ 1.1.7-SNAPSHOT
+ compile
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ compile
+
+
+ org.apache.hadoop
+ hadoop-common
+ compile
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/blacklist_parameters.json b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/blacklist_parameters.json
index cb13ff0242..9a2eadaa7d 100644
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/blacklist_parameters.json
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/blacklist_parameters.json
@@ -5,6 +5,12 @@
"paramDescription": "the path where storing the sequential file",
"paramRequired": true
},
+ {
+ "paramName": "nn",
+ "paramLongName": "hdfsNameNode",
+ "paramDescription": "the name node on hdfs",
+ "paramRequired": true
+ },
{
"paramName": "pgurl",
"paramLongName": "postgresUrl",
@@ -22,11 +28,5 @@
"paramLongName": "postgresPassword",
"paramDescription": "postgres password",
"paramRequired": false
- },
- {
- "paramName": "a",
- "paramLongName": "action",
- "paramDescription": "process claims",
- "paramRequired": false
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml
index 91be13210a..483c0378af 100644
--- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml
@@ -1,34 +1,4 @@
-
-
-
- sourcePath
- the source path
-
-
- sparkDriverMemory
- memory for driver process
-
-
- sparkExecutorMemory
- memory for individual executor
-
-
- sparkExecutorCores
- number of cores used by single executor
-
-
- isLookUpUrl
- the isLookup service endpoint
-
-
- pathMap
- the json path associated to each selection field
-
-
- outputPath
- the output path
-
-
+
@@ -38,190 +8,26 @@
-
-
-
-
-
-
-
-
+
-
+
-
-
-
-
-
-
-
-
-
+
+
${jobTracker}
${nameNode}
- ${nameNode}/${sourcePath}/relation
- ${nameNode}/${outputPath}/relation
-
-
+ eu.dnetlib.dhp.blacklist.ReadBlacklistFromDB
+ --hdfsPath${workingDir}/blacklist
+ --hdfsNameNode${nameNode}
+ --postgresUrl${postgresUrl}
+ --postgresUser${postgresUser}
+ --postgresPassword${postgresPassword}
+
+
-
-
- ${jobTracker}
- ${nameNode}
- ${nameNode}/${sourcePath}/organization
- ${nameNode}/${outputPath}/organization
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- ${nameNode}/${sourcePath}/project
- ${nameNode}/${outputPath}/project
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- ${nameNode}/${sourcePath}/datasource
- ${nameNode}/${outputPath}/datasource
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
- cluster
- bulkTagging-publication
- eu.dnetlib.dhp.bulktag.SparkBulkTagJob2
- dhp-bulktag-${projectVersion}.jar
-
- --num-executors=${sparkExecutorNumber}
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}/publication
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
- --outputPath${outputPath}/publication
- --pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
- cluster
- bulkTagging-dataset
- eu.dnetlib.dhp.bulktag.SparkBulkTagJob2
- dhp-bulktag-${projectVersion}.jar
-
- --num-executors=${sparkExecutorNumber}
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}/dataset
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
- --outputPath${outputPath}/dataset
- --pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
- cluster
- bulkTagging-orp
- eu.dnetlib.dhp.bulktag.SparkBulkTagJob2
- dhp-bulktag-${projectVersion}.jar
-
- --num-executors=${sparkExecutorNumber}
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}/otherresearchproduct
- --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
- --outputPath${outputPath}/otherresearchproduct
- --pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- yarn-cluster
- cluster
- bulkTagging-software
- eu.dnetlib.dhp.bulktag.SparkBulkTagJob2
- dhp-bulktag-${projectVersion}.jar
-
- --num-executors=${sparkExecutorNumber}
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}/software
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
- --outputPath${outputPath}/software
- --pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
-
-
-
-
-
\ No newline at end of file