From b195da3a833cb0415c3fb40146eb7e46538dbafe Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 28 Jun 2023 11:20:09 +0200 Subject: [PATCH] Added utility to write time logs during the deduplication phase --- .../application/dedup/log/DedupLogModel.scala | 3 +++ .../application/dedup/log/DedupLogWriter.scala | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogModel.scala create mode 100644 dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogWriter.scala diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogModel.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogModel.scala new file mode 100644 index 000000000..c1473e7df --- /dev/null +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogModel.scala @@ -0,0 +1,3 @@ +package eu.dnetlib.dhp.application.dedup.log + +case class DedupLogModel(tag:String, configuration:String, entity:String, startTS:Long, endTS:Long, totalMs:Long ) {} diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogWriter.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogWriter.scala new file mode 100644 index 000000000..3060a13ae --- /dev/null +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogWriter.scala @@ -0,0 +1,16 @@ +package eu.dnetlib.dhp.application.dedup.log + +import org.apache.spark.sql.{SaveMode, SparkSession} + +class DedupLogWriter (path:String) { + + + def appendLog(dedupLogModel: DedupLogModel, spark:SparkSession): Unit = { + import spark.implicits._ + val df = spark.createDataset[DedupLogModel](data = List(dedupLogModel)) + df.write.mode(SaveMode.Append).save(path) + + + } + +}