1
0
Fork 0

removed generation of EBI links from old dump, now EBI link dump is created by another wf

This commit is contained in:
Sandro La Bruzzo 2021-10-05 10:21:33 +02:00
parent 60a6a9a583
commit 991b06bd0b
2 changed files with 1 additions and 6 deletions

View File

@ -32,14 +32,9 @@ object SparkEBILinksToOaf {
import spark.implicits._
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
val ebi_rdd:Dataset[EBILinkItem] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => BioDBToOAF.extractEBILinksFromDump(s))).as[EBILinkItem]
ebi_rdd.write.mode(SaveMode.Overwrite).save(s"${sourcePath}_dataset")
val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null)
ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links))
.repartition(4000)
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p))
.write.mode(SaveMode.Overwrite).save(targetPath)

View File

@ -74,7 +74,7 @@
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Create Baselnie DataSet</name>
<name>Create Baseline DataSet</name>
<class>eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>