forked from D-Net/dnet-hadoop
refactoring
This commit is contained in:
parent
63d74ee379
commit
9f9dd00b94
|
@ -44,7 +44,7 @@ object GenerateCrossrefDataset {
|
||||||
val targetPath = parser.get("targetPath")
|
val targetPath = parser.get("targetPath")
|
||||||
|
|
||||||
val spark: SparkSession = SparkSession.builder().config(conf)
|
val spark: SparkSession = SparkSession.builder().config(conf)
|
||||||
.appName(UnpackCrossrefDumpEntries.getClass.getSimpleName)
|
.appName(GenerateCrossrefDataset.getClass.getSimpleName)
|
||||||
.master(master)
|
.master(master)
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
val sc: SparkContext = spark.sparkContext
|
val sc: SparkContext = spark.sparkContext
|
||||||
|
@ -61,7 +61,6 @@ object GenerateCrossrefDataset {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// sc.textFile(sourcePath,6000)
|
|
||||||
sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2))
|
sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2))
|
||||||
.map(meta => crossrefElement(meta))
|
.map(meta => crossrefElement(meta))
|
||||||
.toDS()//.as[CrossrefDT]
|
.toDS()//.as[CrossrefDT]
|
||||||
|
|
Loading…
Reference in New Issue