removed generation of EBI links from old dump, now EBI link dump is created by another wf

beta
Sandro La Bruzzo 3 years ago
parent 60a6a9a583
commit 991b06bd0b

@ -32,14 +32,9 @@ object SparkEBILinksToOaf {
import spark.implicits._
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
val ebi_rdd:Dataset[EBILinkItem] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => BioDBToOAF.extractEBILinksFromDump(s))).as[EBILinkItem]
ebi_rdd.write.mode(SaveMode.Overwrite).save(s"${sourcePath}_dataset")
val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null)
ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links))
.repartition(4000)
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p))
.write.mode(SaveMode.Overwrite).save(targetPath)

@ -74,7 +74,7 @@
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Create Baselnie DataSet</name>
<name>Create Baseline DataSet</name>
<class>eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>

Loading…
Cancel
Save