Added utility to write time logs during the deduplication phase

This commit is contained in:
Sandro La Bruzzo 2023-06-28 11:20:09 +02:00
parent 88a1cbc37d
commit b195da3a83
2 changed files with 19 additions and 0 deletions

View File

@ -0,0 +1,3 @@
package eu.dnetlib.dhp.application.dedup.log
case class DedupLogModel(tag:String, configuration:String, entity:String, startTS:Long, endTS:Long, totalMs:Long ) {}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.application.dedup.log
import org.apache.spark.sql.{SaveMode, SparkSession}
class DedupLogWriter (path:String) {
def appendLog(dedupLogModel: DedupLogModel, spark:SparkSession): Unit = {
import spark.implicits._
val df = spark.createDataset[DedupLogModel](data = List(dedupLogModel))
df.write.mode(SaveMode.Append).save(path)
}
}