parquet_path /var/lib/dnet/mdstore_PROD/md-7763c517-538d-4aa7-83f8-6096b3ce0d96/md-7763c517-538d-4aa7-83f8-6096b3ce0d96-1702622132535/store the full path of a parquet-file or a directory with parquet files, to be processed openaire_guidelines 4.0 the version of the OpenAIRE Guidelines to validate the records against output_path /user/${dhp.hadoop.frontend.user.name}/continuous_validation/output the path of the output-directory where the result-json-files will be stored sparkDriverMemory 4096M memory for driver process sparkExecutorMemory 2048m memory for individual executor sparkExecutorCores 4 number of cores used by single executor spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2EventLogDir spark2/logs spark 2.* event log dir location oozie.libpath ${nameNode}/user/oozie/share/lib

${jobTracker}

${nameNode}

mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster Validate multiple records against OpenAIRE Guidelines eu.dnetlib.dhp.continuous_validation.ContinuousValidation dhp-continuous-validation-${projectVersion}.jar

--executor-memory=${sparkExecutorMemory} --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840

--parquet_path${parquet_path} --openaire_guidelines${openaire_guidelines} --output_path${output_path}