removed generation of EBI links from old dump, now EBI link dump is created by another wf
This commit is contained in:
parent
60a6a9a583
commit
991b06bd0b
|
@ -32,14 +32,9 @@ object SparkEBILinksToOaf {
|
|||
import spark.implicits._
|
||||
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
||||
|
||||
val ebi_rdd:Dataset[EBILinkItem] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => BioDBToOAF.extractEBILinksFromDump(s))).as[EBILinkItem]
|
||||
|
||||
ebi_rdd.write.mode(SaveMode.Overwrite).save(s"${sourcePath}_dataset")
|
||||
|
||||
val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null)
|
||||
|
||||
ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links))
|
||||
.repartition(4000)
|
||||
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
|
||||
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p))
|
||||
.write.mode(SaveMode.Overwrite).save(targetPath)
|
||||
|
|
|
@ -74,7 +74,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Create Baselnie DataSet</name>
|
||||
<name>Create Baseline DataSet</name>
|
||||
|
||||
<class>eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
|
|
Loading…
Reference in New Issue