forked from D-Net/dnet-hadoop
removed generation of EBI links from old dump, now EBI link dump is created by another wf
This commit is contained in:
parent
60a6a9a583
commit
991b06bd0b
|
@ -32,14 +32,9 @@ object SparkEBILinksToOaf {
|
||||||
import spark.implicits._
|
import spark.implicits._
|
||||||
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
||||||
|
|
||||||
val ebi_rdd:Dataset[EBILinkItem] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => BioDBToOAF.extractEBILinksFromDump(s))).as[EBILinkItem]
|
|
||||||
|
|
||||||
ebi_rdd.write.mode(SaveMode.Overwrite).save(s"${sourcePath}_dataset")
|
|
||||||
|
|
||||||
val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null)
|
val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null)
|
||||||
|
|
||||||
ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links))
|
ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links))
|
||||||
.repartition(4000)
|
|
||||||
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
|
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
|
||||||
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p))
|
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p))
|
||||||
.write.mode(SaveMode.Overwrite).save(targetPath)
|
.write.mode(SaveMode.Overwrite).save(targetPath)
|
||||||
|
|
|
@ -74,7 +74,7 @@
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Create Baselnie DataSet</name>
|
<name>Create Baseline DataSet</name>
|
||||||
|
|
||||||
<class>eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates</class>
|
<class>eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
|
Loading…
Reference in New Issue