forked from D-Net/dnet-hadoop
minor fixes
This commit is contained in:
parent
baed5e3337
commit
c3ad4ab701
|
@ -209,8 +209,8 @@ object SparkProduceHostedByMap {
|
||||||
|
|
||||||
|
|
||||||
Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath)
|
Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath)
|
||||||
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold"))
|
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json"))
|
||||||
.union(doajHostedByDataset(spark, workingDirPath + "/doaj"))
|
.union(doajHostedByDataset(spark, workingDirPath + "/doaj.json"))
|
||||||
.flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|"))
|
.flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|"))
|
||||||
.map(hbi => toHostedByMap(hbi))(Encoders.STRING)
|
.map(hbi => toHostedByMap(hbi))(Encoders.STRING)
|
||||||
.rdd.saveAsTextFile(outputPath , classOf[GzipCodec])
|
.rdd.saveAsTextFile(outputPath , classOf[GzipCodec])
|
||||||
|
|
|
@ -1,13 +1,10 @@
|
||||||
|
|
||||||
[
|
[
|
||||||
|
|
||||||
{
|
{
|
||||||
"paramName":"fu",
|
"paramName":"fu",
|
||||||
"paramLongName":"fileURL",
|
"paramLongName":"fileURL",
|
||||||
"paramDescription": "the url to download the csv file ",
|
"paramDescription": "the url to download the csv file ",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"paramName":"wp",
|
"paramName":"wp",
|
||||||
"paramLongName":"workingPath",
|
"paramLongName":"workingPath",
|
||||||
|
@ -27,9 +24,9 @@
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"paramName": "sr",
|
"paramName": "d",
|
||||||
"paramLongName": "replace",
|
"paramLongName": "delimiter",
|
||||||
"paramDescription": "true if the input file has to be cleaned before parsing",
|
"paramDescription": "csv delimiter character",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -78,7 +78,6 @@
|
||||||
</switch>
|
</switch>
|
||||||
</decision>
|
</decision>
|
||||||
|
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
@ -92,7 +91,6 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
<fork name="fork_downloads_csv">
|
<fork name="fork_downloads_csv">
|
||||||
<path start="download_gold"/>
|
<path start="download_gold"/>
|
||||||
<path start="download_doaj"/>
|
<path start="download_doaj"/>
|
||||||
|
@ -100,7 +98,7 @@
|
||||||
|
|
||||||
<action name="download_gold">
|
<action name="download_gold">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
|
||||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--fileURL</arg><arg>${unibiFileURL}</arg>
|
<arg>--fileURL</arg><arg>${unibiFileURL}</arg>
|
||||||
<arg>--workingPath</arg><arg>${workingDir}/unibi_gold</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/unibi_gold</arg>
|
||||||
|
@ -113,7 +111,7 @@
|
||||||
|
|
||||||
<action name="download_doaj">
|
<action name="download_doaj">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.common.collection.DownloadCSV</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
|
||||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--fileURL</arg><arg>${doajFileURL}</arg>
|
<arg>--fileURL</arg><arg>${doajFileURL}</arg>
|
||||||
<arg>--workingPath</arg><arg>${workingDir}/doaj</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/doaj</arg>
|
||||||
|
|
Loading…
Reference in New Issue