forked from D-Net/dnet-hadoop
refactoring
This commit is contained in:
parent
63d74ee379
commit
9f9dd00b94
|
@ -44,7 +44,7 @@ object GenerateCrossrefDataset {
|
|||
val targetPath = parser.get("targetPath")
|
||||
|
||||
val spark: SparkSession = SparkSession.builder().config(conf)
|
||||
.appName(UnpackCrossrefDumpEntries.getClass.getSimpleName)
|
||||
.appName(GenerateCrossrefDataset.getClass.getSimpleName)
|
||||
.master(master)
|
||||
.getOrCreate()
|
||||
val sc: SparkContext = spark.sparkContext
|
||||
|
@ -61,7 +61,6 @@ object GenerateCrossrefDataset {
|
|||
}
|
||||
|
||||
|
||||
// sc.textFile(sourcePath,6000)
|
||||
sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2))
|
||||
.map(meta => crossrefElement(meta))
|
||||
.toDS()//.as[CrossrefDT]
|
||||
|
|
Loading…
Reference in New Issue