-

2020-07-09 11:46:32 +02:00 · 2020-07-09 11:46:32 +02:00 · d77d213d7c
parent b1dad7959c
commit d77d213d7c
8 changed files with 83 additions and 41 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/gcat/GCatAPIClient.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/gcat/GCatAPIClient.java
@ -101,12 +101,12 @@ public class GCatAPIClient {
 		OkHttpClient httpClient = new OkHttpClient();

 		Request request = new Request.Builder()
-				.url(getGcatBaseURL() + itemPath + "?offset=" + offset + "&limit=" + limit)
-				.header("gcube-token", getApplicationToken())
-				.addHeader("Content-Type", "application/json")
-				.addHeader("Accept", "application/json")
-				.get()
-				.build();
+			.url(getGcatBaseURL() + itemPath + "?offset=" + offset + "&limit=" + limit)
+			.header("gcube-token", getApplicationToken())
+			.addHeader("Content-Type", "application/json")
+			.addHeader("Accept", "application/json")
+			.get()
+			.build();

 		try (Response response = httpClient.newCall(request).execute()) {
 			int status = response.code();
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/gcat/SendToCatalogue.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/gcat/SendToCatalogue.java
@ -28,13 +28,14 @@ public class SendToCatalogue implements Serializable {
 				.toString(
 					SendToCatalogue.class
 						.getResourceAsStream(
-							"/eu/dnetlib/dhp/blacklist/catalogue_parameters.json")));
+							"/eu/dnetlib/dhp/oa/graph/gcat/catalogue_parameters.json")));

 		parser.parseArgument(args);

-		final String access_token = parser.get("accessToken");
+		final String access_token = parser.get("gcattoken");
 		final String hdfsPath = parser.get("hdfsPath");
 		final String hdfsNameNode = parser.get("hdfsNameNode");
+		final String gcatBaseUrl = parser.get("gcatBaseUrl");

 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);
@ -46,6 +47,7 @@ public class SendToCatalogue implements Serializable {
 				new Path(hdfsPath), true);
 		GCatAPIClient gCatAPIClient = new GCatAPIClient();
 		gCatAPIClient.setApplicationToken(access_token);
+		gCatAPIClient.setGcatBaseURL(gcatBaseUrl);
 		int purged = gCatAPIClient.purgeAll();
 		log.info("purged: " + purged);
 		while (fileStatusListIterator.hasNext()) {
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/gcat/SparkDumpRISISCatalogue.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/gcat/SparkDumpRISISCatalogue.java
@ -35,7 +35,7 @@ public class SparkDumpRISISCatalogue implements Serializable {
 			.toString(
 				SparkDumpRISISCatalogue.class
 					.getResourceAsStream(
-						"/eu/dnetlib/dhp/oa/graph/gcat/catalogue_parameters.json"));
+						"/eu/dnetlib/dhp/oa/graph/gcat/dump_parameters.json"));

 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml
@ -23,4 +23,5 @@
        <name>hiveDbName</name>
        <value>openaire</value>
    </property>
+
 </configuration>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/catalogue_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/catalogue_parameters.json
@ -1,35 +1,27 @@
-
-
 [

  {
-    "paramName":"s",
-    "paramLongName":"sourcePath",
-    "paramDescription": "the path of the sequencial file to read",
+    "paramName":"gct",
+    "paramLongName":"gcattoken",
+    "paramDescription": "the token for the deposition on the catalogue",
    "paramRequired": true
  },
  {
-    "paramName": "out",
-    "paramLongName": "outputPath",
-    "paramDescription": "the path used to store temporary output files",
+    "paramName": "p",
+    "paramLongName": "hdfsPath",
+    "paramDescription": "the path where storing the sequential file",
    "paramRequired": true
  },
  {
-    "paramName": "ssm",
-    "paramLongName": "isSparkSessionManaged",
-    "paramDescription": "true if the spark session is managed, false otherwise",
-    "paramRequired": false
-  },
-  {
-    "paramName":"tn",
-    "paramLongName":"resultTableName",
-    "paramDescription": "the name of the result table we are currently working on",
+    "paramName": "nn",
+    "paramLongName": "hdfsNameNode",
+    "paramDescription": "the name node on hdfs",
    "paramRequired": true
  },
  {
-    "paramName":"cm",
-    "paramLongName":"communityName",
-    "paramDescription": "the name of the community for which to execute the dump to the catalogue",
+    "paramName": "gbu",
+    "paramLongName": "gcatBaseUrl",
+    "paramDescription": "the base url for the catalogue",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/dump_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/dump_parameters.json
@ -0,0 +1,38 @@
+
+
+[
+
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName":"tn",
+    "paramLongName":"resultTableName",
+    "paramDescription": "the name of the result table we are currently working on",
+    "paramRequired": true
+  },
+  {
+    "paramName":"cm",
+    "paramLongName":"communityName",
+    "paramDescription": "the name of the community for which to execute the dump to the catalogue",
+    "paramRequired": true
+  }
+]
+
+
+
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/oozie_app/config-default.xml
@ -23,4 +23,8 @@
        <name>hiveDbName</name>
        <value>openaire</value>
    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
 </configuration>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/gcat/oozie_app/workflow.xml
@ -5,14 +5,18 @@
                <name>sourcePath</name>
                <description>the source path</description>
            </property>
-            <property>
-                <name>outputPath</name>
-                <description>the output path</description>
-            </property>
        <property>
            <name>communityName</name>
            <description>The name of the community for which execute the dump for the catalogue</description>
        </property>
+        <property>
+            <name>gcattoken</name>
+            <description>the access token for the deposition on the cataloge</description>
+        </property>
+        <property>
+            <name>gcatBaseUrl</name>
+            <description>the baseUrl to access the catalogue</description>
+        </property>
        <property>
            <name>hiveDbName</name>
            <description>the target hive database name</description>
@ -88,8 +92,8 @@

    <action name="reset_outputpath">
        <fs>
-            <delete path="${outputPath}"/>
-            <mkdir path="${outputPath}"/>
+            <delete path="${workingDir}"/>
+            <mkdir path="${workingDir}"/>
        </fs>
        <ok to="fork_dump"/>
        <error to="Kill"/>
@ -122,7 +126,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
            <arg>--communityName</arg><arg>${communityName}</arg>
        </spark>
        <ok to="join_dump"/>
@ -148,7 +152,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
            <arg>--communityName</arg><arg>${communityName}</arg>
        </spark>
        <ok to="join_dump"/>
@ -174,7 +178,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
            <arg>--communityName</arg><arg>${communityName}</arg>
        </spark>
        <ok to="join_dump"/>
@ -200,7 +204,7 @@
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/software</arg>
            <arg>--communityName</arg><arg>${communityName}</arg>
        </spark>
        <ok to="join_dump"/>
@ -212,9 +216,10 @@
    <action name="populate_catalogue">
        <java>
            <main-class>eu.dnetlib.dhp.oa.graph.dump.gcat.SendToCatalogue</main-class>
-            <arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
+            <arg>--hdfsPath</arg><arg>${workingDir}</arg>
            <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
-            <arg>--accessToken</arg><arg>${accessToken}</arg>
+            <arg>--gcattoken</arg><arg>${gcattoken}</arg>
+            <arg>--gcatBaseUrl</arg><arg>${gcatBaseUrl}</arg>
        </java>
        <ok to="End"/>
        <error to="Kill"/>