OAF-store-graph mdstores: firther fix for PR#180

This commit is contained in:
Claudio Atzori 2022-01-05 15:49:05 +01:00
parent 58f8998e3d
commit 908294d86e
1 changed files with 14 additions and 13 deletions

View File

@ -48,26 +48,27 @@ object CopyHdfsOafSparkApplication {
log.info("hdfsPath: {}", hdfsPath) log.info("hdfsPath: {}", hdfsPath)
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
import spark.implicits._
val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala
val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList
val types = ModelSupport.oafTypes.entrySet
.asScala
.map(e => Tuple2(e.getKey, e.getValue))
if (validPaths.nonEmpty) { if (validPaths.nonEmpty) {
val oaf = spark.read.load(validPaths: _*).as[String] val oaf = spark.read.textFile(validPaths: _*)
val mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) val mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
val l = ModelSupport.oafTypes.entrySet.asScala.toList
l.foreach( types.foreach(t => oaf
e => .filter(o => isOafType(o, t._1))
oaf .map(j => mapper.readValue(j, t._2).asInstanceOf[Oaf])
.filter(o => isOafType(o, e.getKey)) .map(s => mapper.writeValueAsString(s))(Encoders.STRING)
.map(j => mapper.readValue(j, e.getValue).asInstanceOf[Oaf]) .write
.map(s => mapper.writeValueAsString(s))(Encoders.STRING) .option("compression", "gzip")
.write .mode(SaveMode.Append)
.option("compression", "gzip") .text(s"$hdfsPath/${t._1}")
.mode(SaveMode.Append)
.text(s"$hdfsPath/${e}")
) )
} }
} }