forked from D-Net/dnet-hadoop
minor fixes
This commit is contained in:
parent
baed5e3337
commit
c3ad4ab701
|
@ -209,8 +209,8 @@ object SparkProduceHostedByMap {
|
|||
|
||||
|
||||
Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath)
|
||||
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold"))
|
||||
.union(doajHostedByDataset(spark, workingDirPath + "/doaj"))
|
||||
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json"))
|
||||
.union(doajHostedByDataset(spark, workingDirPath + "/doaj.json"))
|
||||
.flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|"))
|
||||
.map(hbi => toHostedByMap(hbi))(Encoders.STRING)
|
||||
.rdd.saveAsTextFile(outputPath , classOf[GzipCodec])
|
||||
|
|
|
@ -1,13 +1,10 @@
|
|||
|
||||
[
|
||||
|
||||
{
|
||||
"paramName":"fu",
|
||||
"paramLongName":"fileURL",
|
||||
"paramDescription": "the url to download the csv file ",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
||||
{
|
||||
"paramName":"wp",
|
||||
"paramLongName":"workingPath",
|
||||
|
@ -27,9 +24,9 @@
|
|||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "sr",
|
||||
"paramLongName": "replace",
|
||||
"paramDescription": "true if the input file has to be cleaned before parsing",
|
||||
"paramName": "d",
|
||||
"paramLongName": "delimiter",
|
||||
"paramDescription": "csv delimiter character",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
||||
|
|
|
@ -78,7 +78,6 @@
|
|||
</switch>
|
||||
</decision>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
@ -92,7 +91,6 @@
|
|||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<fork name="fork_downloads_csv">
|
||||
<path start="download_gold"/>
|
||||
<path start="download_doaj"/>
|
||||
|
@ -100,7 +98,7 @@
|
|||
|
||||
<action name="download_gold">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--fileURL</arg><arg>${unibiFileURL}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/unibi_gold</arg>
|
||||
|
@ -113,7 +111,7 @@
|
|||
|
||||
<action name="download_doaj">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--fileURL</arg><arg>${doajFileURL}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/doaj</arg>
|
||||
|
|
Loading…
Reference in New Issue