diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index d0c603e29..1ee1d5d1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -209,8 +209,8 @@ object SparkProduceHostedByMap { Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) - .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) - .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index fba048343..cf417c675 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -1,13 +1,10 @@ - [ - { "paramName":"fu", "paramLongName":"fileURL", "paramDescription": "the url to download the csv file ", "paramRequired": true }, - { "paramName":"wp", "paramLongName":"workingPath", @@ -27,9 +24,9 @@ "paramRequired": true }, { - "paramName": "sr", - "paramLongName": "replace", - "paramDescription": "true if the input file has to be cleaned before parsing", + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "csv delimiter character", "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index d7b85b0cb..870b4ba0f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -78,7 +78,6 @@ - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -92,7 +91,6 @@ - @@ -100,7 +98,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold @@ -113,7 +111,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj