forked from D-Net/dnet-hadoop
Added utility to write time logs during the deduplication phase
This commit is contained in:
parent
88a1cbc37d
commit
b195da3a83
|
@ -0,0 +1,3 @@
|
|||
package eu.dnetlib.dhp.application.dedup.log
|
||||
|
||||
case class DedupLogModel(tag:String, configuration:String, entity:String, startTS:Long, endTS:Long, totalMs:Long ) {}
|
|
@ -0,0 +1,16 @@
|
|||
package eu.dnetlib.dhp.application.dedup.log
|
||||
|
||||
import org.apache.spark.sql.{SaveMode, SparkSession}
|
||||
|
||||
class DedupLogWriter (path:String) {
|
||||
|
||||
|
||||
def appendLog(dedupLogModel: DedupLogModel, spark:SparkSession): Unit = {
|
||||
import spark.implicits._
|
||||
val df = spark.createDataset[DedupLogModel](data = List(dedupLogModel))
|
||||
df.write.mode(SaveMode.Append).save(path)
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue