graphOutputPath the target path to store raw graph reuseDBClaims false should import content from the aggregator or reuse a previous version reuseODFClaims false should import content from the aggregator or reuse a previous version reuseOAFClaims false should import content from the aggregator or reuse a previous version reuseDB false should import content from the aggregator or reuse a previous version reuseDBOpenorgs false should import content from the aggregator or reuse a previous version reuseODF false should import content from the aggregator or reuse a previous version reuseOAF false should import content from the aggregator or reuse a previous version reuseODF_hdfs false should import content from the aggregator or reuse a previous version reuseOAF_hdfs false should import content from the aggregator or reuse a previous version contentPath path location to store (or reuse) content from the aggregator postgresURL the postgres URL to access to the database postgresUser the user postgres postgresPassword the password postgres postgresOpenOrgsURL the postgres URL to access to the OpenOrgs database postgresOpenOrgsUser the user of OpenOrgs database postgresOpenOrgsPassword the password of OpenOrgs database dbSchema beta the database schema according to the D-Net infrastructure (beta or production) mongoURL mongoDB url, example: mongodb://[username:password@]host[:port] mongoDb mongo database isLookupUrl the address of the lookUp service nsPrefixBlacklist a blacklist of nsprefixes (comma separeted) shouldPatchRelations false activates the relation patching phase, driven by the content in ${idMappingPath} idMappingPath path pointing to the relations identifiers mapping dataset sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location ${jobTracker} ${nameNode} mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${wf:conf('reuseDBClaims') eq false} ${wf:conf('reuseDBClaims') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication --hdfsPath${contentPath}/db_claims --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} --isLookupUrl${isLookupUrl} --actionclaims --dbschema${dbSchema} --nsPrefixBlacklist${nsPrefixBlacklist} ${wf:conf('reuseODFClaims') eq false} ${wf:conf('reuseODFClaims') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication -p${contentPath}/odf_claims -mongourl${mongoURL} -mongodb${mongoDb} -fODF -lstore -iclaim ${wf:conf('reuseOAFClaims') eq false} ${wf:conf('reuseOAFClaims') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication -p${contentPath}/oaf_claims -mongourl${mongoURL} -mongodb${mongoDb} -fOAF -lstore -iclaim ${wf:conf('reuseDB') eq false} ${wf:conf('reuseDB') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication --hdfsPath${contentPath}/db_openaire --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} --isLookupUrl${isLookupUrl} --actionopenaire --dbschema${dbSchema} --nsPrefixBlacklist${nsPrefixBlacklist} ${wf:conf('reuseODF') eq false} ${wf:conf('reuseODF') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication --hdfsPath${contentPath}/odf_records --mongoBaseUrl${mongoURL} --mongoDb${mongoDb} --mdFormatODF --mdLayoutstore --mdInterpretationcleaned ${wf:conf('reuseOAF') eq false} ${wf:conf('reuseOAF') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication --hdfsPath${contentPath}/oaf_records --mongoBaseUrl${mongoURL} --mongoDb${mongoDb} --mdFormatOAF --mdLayoutstore --mdInterpretationcleaned eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication --hdfsPath${contentPath}/oaf_records_invisible --mongoBaseUrl${mongoURL} --mongoDb${mongoDb} --mdFormatOAF --mdLayoutstore --mdInterpretationintersection ${wf:conf('reuseODF_hdfs') eq false} ${wf:conf('reuseODF_hdfs') eq true} yarn cluster ImportODF_hdfs eu.dnetlib.dhp.oa.graph.raw.MigrateHdfsMdstoresApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --hdfsPath${contentPath}/odf_records_hdfs --mdstoreManagerUrl${mdstoreManagerUrl} --mdFormatODF --mdLayoutstore --mdInterpretationcleaned ${wf:conf('reuseOAF_hdfs') eq false} ${wf:conf('reuseOAF_hdfs') eq true} yarn cluster ImportOAF_hdfs eu.dnetlib.dhp.oa.graph.raw.MigrateHdfsMdstoresApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --hdfsPath${contentPath}/oaf_records_hdfs --mdstoreManagerUrl${mdstoreManagerUrl} --mdFormatOAF --mdLayoutstore --mdInterpretationcleaned ${wf:conf('reuseDBOpenorgs') eq false} ${wf:conf('reuseDBOpenorgs') eq true} eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication --hdfsPath${contentPath}/db_openorgs --postgresUrl${postgresOpenOrgsURL} --postgresUser${postgresOpenOrgsUser} --postgresPassword${postgresOpenOrgsPassword} --isLookupUrl${isLookupUrl} --actionopenorgs --dbschema${dbSchema} --nsPrefixBlacklist${nsPrefixBlacklist} yarn cluster GenerateEntities_claim eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims,${contentPath}/oaf_records_invisible --targetPath${workingDir}/entities_claim --isLookupUrl${isLookupUrl} --shouldHashId${shouldHashId} --modeclaim yarn cluster GenerateGraph_claims eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${workingDir}/entities_claim --graphRawPath${workingDir}/graph_claims yarn cluster GenerateEntities eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePaths${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_records,${contentPath}/odf_records,${contentPath}/oaf_records_hdfs,${contentPath}/odf_records_hdfs --targetPath${workingDir}/entities --isLookupUrl${isLookupUrl} --shouldHashId${shouldHashId} yarn cluster GenerateGraph eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --sourcePath${workingDir}/entities --graphRawPath${workingDir}/graph_raw yarn cluster MergeClaims_publication eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication yarn cluster MergeClaims_dataset eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset yarn cluster MergeClaims_relation eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation yarn cluster MergeClaims_software eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=1920 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software yarn cluster MergeClaims_otherresearchproduct eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=1920 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct yarn cluster MergeClaims_datasource eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=200 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource yarn cluster MergeClaims_organization eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=200 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization yarn cluster MergeClaims_project eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=200 --rawGraphPath${workingDir}/graph_raw --claimsGraphPath${workingDir}/graph_claims --outputRawGaphPath${graphOutputPath} --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project ${(shouldPatchRelations eq "true") and (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} yarn cluster PatchRelations eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --graphBasePath${graphOutputPath} --workingDir${workingDir}/patch_relations --idMappingPath${idMappingPath}