1
0
Fork 0

minor fixes

This commit is contained in:
Claudio Atzori 2021-08-13 12:23:15 +02:00
parent baed5e3337
commit c3ad4ab701
3 changed files with 7 additions and 12 deletions

View File

@ -209,8 +209,8 @@ object SparkProduceHostedByMap {
Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath)
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json"))
.union(doajHostedByDataset(spark, workingDirPath + "/doaj")) .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json"))
.flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|"))
.map(hbi => toHostedByMap(hbi))(Encoders.STRING) .map(hbi => toHostedByMap(hbi))(Encoders.STRING)
.rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) .rdd.saveAsTextFile(outputPath , classOf[GzipCodec])

View File

@ -1,13 +1,10 @@
[ [
{ {
"paramName":"fu", "paramName":"fu",
"paramLongName":"fileURL", "paramLongName":"fileURL",
"paramDescription": "the url to download the csv file ", "paramDescription": "the url to download the csv file ",
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName":"wp", "paramName":"wp",
"paramLongName":"workingPath", "paramLongName":"workingPath",
@ -27,9 +24,9 @@
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "sr", "paramName": "d",
"paramLongName": "replace", "paramLongName": "delimiter",
"paramDescription": "true if the input file has to be cleaned before parsing", "paramDescription": "csv delimiter character",
"paramRequired": false "paramRequired": false
} }
] ]

View File

@ -78,7 +78,6 @@
</switch> </switch>
</decision> </decision>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
@ -92,7 +91,6 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="fork_downloads_csv"> <fork name="fork_downloads_csv">
<path start="download_gold"/> <path start="download_gold"/>
<path start="download_doaj"/> <path start="download_doaj"/>
@ -100,7 +98,7 @@
<action name="download_gold"> <action name="download_gold">
<java> <java>
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class> <main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${unibiFileURL}</arg> <arg>--fileURL</arg><arg>${unibiFileURL}</arg>
<arg>--workingPath</arg><arg>${workingDir}/unibi_gold</arg> <arg>--workingPath</arg><arg>${workingDir}/unibi_gold</arg>
@ -113,7 +111,7 @@
<action name="download_doaj"> <action name="download_doaj">
<java> <java>
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class> <main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${doajFileURL}</arg> <arg>--fileURL</arg><arg>${doajFileURL}</arg>
<arg>--workingPath</arg><arg>${workingDir}/doaj</arg> <arg>--workingPath</arg><arg>${workingDir}/doaj</arg>