1
0
Fork 0

refactoring

This commit is contained in:
Miriam Baglioni 2021-06-15 09:24:46 +02:00
parent 63d74ee379
commit 9f9dd00b94
1 changed files with 1 additions and 2 deletions

View File

@ -44,7 +44,7 @@ object GenerateCrossrefDataset {
val targetPath = parser.get("targetPath") val targetPath = parser.get("targetPath")
val spark: SparkSession = SparkSession.builder().config(conf) val spark: SparkSession = SparkSession.builder().config(conf)
.appName(UnpackCrossrefDumpEntries.getClass.getSimpleName) .appName(GenerateCrossrefDataset.getClass.getSimpleName)
.master(master) .master(master)
.getOrCreate() .getOrCreate()
val sc: SparkContext = spark.sparkContext val sc: SparkContext = spark.sparkContext
@ -61,7 +61,6 @@ object GenerateCrossrefDataset {
} }
// sc.textFile(sourcePath,6000)
sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2)) sc.wholeTextFiles(sourcePath,6000).flatMap(d =>extractDump(d._2))
.map(meta => crossrefElement(meta)) .map(meta => crossrefElement(meta))
.toDS()//.as[CrossrefDT] .toDS()//.as[CrossrefDT]