workingPath /data/bioschema/disprot/ the working path rdfInput nquads.seq rdf output of scraping workflow output json-datacite/ oozie.launcher.mapreduce.map.java.opts -Xmx4g spark2RdfConversionMaxExecutors 50 sparkDriverMemory 7G memory for driver process sparkExecutorMemory 2G memory for individual executor spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location

${jobTracker}

${nameNode}

Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn-cluster cluster NquadsToDataciteJson eu.dnetlib.dhp.rdfconverter.bioschema.SparkRdfToDatacite dhp-rdfconverter-${projectVersion}.jar

--conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2RdfConversionMaxExecutors} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}

--nameNode${nameNode} --workingPath${workingPath} --rdfInput${rdfInput} --output${output}