1
0
Fork 0

minor fixes

This commit is contained in:
Claudio Atzori 2021-08-13 12:23:15 +02:00
parent baed5e3337
commit c3ad4ab701
3 changed files with 7 additions and 12 deletions

View File

@ -209,8 +209,8 @@ object SparkProduceHostedByMap {
Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath)
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold"))
.union(doajHostedByDataset(spark, workingDirPath + "/doaj"))
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json"))
.union(doajHostedByDataset(spark, workingDirPath + "/doaj.json"))
.flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|"))
.map(hbi => toHostedByMap(hbi))(Encoders.STRING)
.rdd.saveAsTextFile(outputPath , classOf[GzipCodec])

View File

@ -1,13 +1,10 @@
[
{
"paramName":"fu",
"paramLongName":"fileURL",
"paramDescription": "the url to download the csv file ",
"paramRequired": true
},
{
"paramName":"wp",
"paramLongName":"workingPath",
@ -27,9 +24,9 @@
"paramRequired": true
},
{
"paramName": "sr",
"paramLongName": "replace",
"paramDescription": "true if the input file has to be cleaned before parsing",
"paramName": "d",
"paramLongName": "delimiter",
"paramDescription": "csv delimiter character",
"paramRequired": false
}
]

View File

@ -78,7 +78,6 @@
</switch>
</decision>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
@ -92,7 +91,6 @@
<error to="Kill"/>
</action>
<fork name="fork_downloads_csv">
<path start="download_gold"/>
<path start="download_doaj"/>
@ -100,7 +98,7 @@
<action name="download_gold">
<java>
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class>
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${unibiFileURL}</arg>
<arg>--workingPath</arg><arg>${workingDir}/unibi_gold</arg>
@ -113,7 +111,7 @@
<action name="download_doaj">
<java>
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class>
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${doajFileURL}</arg>
<arg>--workingPath</arg><arg>${workingDir}/doaj</arg>