use oozie prepare statement to cleanup working directories

2020-03-30 19:48:41 +02:00 · 2020-03-30 19:48:41 +02:00 · 0fbec69b82
parent 3af2b8d700
commit 0fbec69b82
2 changed files with 4 additions and 5 deletions
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SparkXmlRecordBuilderJob.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SparkXmlRecordBuilderJob.java
@ -29,14 +29,9 @@ public class SparkXmlRecordBuilderJob {
            final String otherDsTypeId = parser.get("otherDsTypeId");

            final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
-            if (fs.exists(new Path(outputPath))) {
-                fs.delete(new Path(outputPath), true);
-                fs.mkdirs(new Path(outputPath));
-            }

            new GraphJoiner(spark, ContextMapper.fromIS(isLookupUrl), otherDsTypeId, inputPath, outputPath)
                    .adjacencyLists();
-                    //.asXML();
        }
    }

--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml
@ -58,6 +58,10 @@

    <action name="adjancency_lists">
        <spark xmlns="uri:oozie:spark-action:0.2">
+            <prepare>
+                <delete path="${outputPath}"/>
+                <mkdir path="${outputPath}"/>
+            </prepare>
            <master>yarn</master>
            <mode>cluster</mode>
            <name>build_adjacency_lists</name>