forked from D-Net/dnet-hadoop
updated workflow for generation of Scholix Datasource's to use mdstore transactions
This commit is contained in:
parent
9d39845d1f
commit
37e36baf76
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="Transform_BioEntity_Workflow" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>sourcePath</name>
|
<name>sourcePath</name>
|
||||||
|
@ -8,19 +8,40 @@
|
||||||
<name>database</name>
|
<name>database</name>
|
||||||
<description>the PDB Database Working Path</description>
|
<description>the PDB Database Working Path</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>targetPath</name>
|
<name>mdStoreOutputId</name>
|
||||||
<description>the Target Working dir path</description>
|
<description>the identifier of the cleaned MDStore</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mdStoreManagerURI</name>
|
||||||
|
<description>the path of the cleaned mdstore</description>
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="ConvertDB"/>
|
<start to="StartTransaction"/>
|
||||||
|
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
|
<action name="StartTransaction">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>NEW_VERSION</arg>
|
||||||
|
<arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
<capture-output/>
|
||||||
|
</java>
|
||||||
|
<ok to="ConvertDB"/>
|
||||||
|
<error to="RollBack"/>
|
||||||
|
</action>
|
||||||
<action name="ConvertDB">
|
<action name="ConvertDB">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
@ -41,11 +62,48 @@
|
||||||
<arg>--master</arg><arg>yarn</arg>
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
<arg>--dbPath</arg><arg>${sourcePath}</arg>
|
<arg>--dbPath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--database</arg><arg>${database}</arg>
|
<arg>--database</arg><arg>${database}</arg>
|
||||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
<arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="CommitVersion"/>
|
||||||
<error to="Kill"/>
|
<error to="RollBack"/>
|
||||||
|
|
||||||
</action>
|
</action>
|
||||||
<end name="End"/>
|
<action name="CommitVersion">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>COMMIT</arg>
|
||||||
|
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="RollBack">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>ROLLBACK</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="Kill"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -2,5 +2,5 @@
|
||||||
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
||||||
{"paramName":"db", "paramLongName":"database", "paramDescription": "should be PDB or UNIPROT", "paramRequired": true},
|
{"paramName":"db", "paramLongName":"database", "paramDescription": "should be PDB or UNIPROT", "paramRequired": true},
|
||||||
{"paramName":"p", "paramLongName":"dbPath", "paramDescription": "the path of the database to transform", "paramRequired": true},
|
{"paramName":"p", "paramLongName":"dbPath", "paramDescription": "the path of the database to transform", "paramRequired": true},
|
||||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the OAF target path ", "paramRequired": true}
|
{"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the oaf path ", "paramRequired": true}
|
||||||
]
|
]
|
|
@ -1,5 +1,20 @@
|
||||||
[
|
[
|
||||||
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
{
|
||||||
{"paramName":"s", "paramLongName":"sourcePath","paramDescription": "the source Path", "paramRequired": true},
|
"paramName": "mt",
|
||||||
{"paramName":"t", "paramLongName":"targetPath","paramDescription": "the oaf path ", "paramRequired": true}
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "should be local or yarn",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "s",
|
||||||
|
"paramLongName": "sourcePath",
|
||||||
|
"paramDescription": "the source Path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "mo",
|
||||||
|
"paramLongName": "mdstoreOutputVersion",
|
||||||
|
"paramDescription": "the oaf path ",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
]
|
]
|
|
@ -9,34 +9,26 @@
|
||||||
<description>the Working Path</description>
|
<description>the Working Path</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>targetPath</name>
|
<name>mdStoreOutputId</name>
|
||||||
<description>the OAF MDStore Path</description>
|
<description>the identifier of the cleaned MDStore</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>mdStoreManagerURI</name>
|
||||||
<description>memory for driver process</description>
|
<description>the path of the cleaned mdstore</description>
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>sparkExecutorMemory</name>
|
|
||||||
<description>memory for individual executor</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>sparkExecutorCores</name>
|
|
||||||
<description>number of cores used by single executor</description>
|
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>resumeFrom</name>
|
<name>resumeFrom</name>
|
||||||
<value>DownloadEBILinks</value>
|
<value>CreateEBIDataSet</value>
|
||||||
<description>node to start</description>
|
<description>node to start</description>
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="resume_from"/>
|
<start to="StartTransaction"/>
|
||||||
|
|
||||||
<decision name="resume_from">
|
<decision name="resume_from">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="DownloadEBILinks">${wf:conf('resumeFrom') eq 'DownloadEBILinks'}</case>
|
<case to="DownloadEBILinks">${wf:conf('resumeFrom') eq 'DownloadEBILinks'}</case>
|
||||||
<case to="CreateEBIDataSet">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
|
<case to="StartTransaction">${wf:conf('resumeFrom') eq 'CreateEBIDataSet'}</case>
|
||||||
<default to="DownloadEBILinks"/>
|
<default to="DownloadEBILinks"/>
|
||||||
</switch>
|
</switch>
|
||||||
</decision>
|
</decision>
|
||||||
|
@ -77,9 +69,29 @@
|
||||||
<move source="${sourcePath}/ebi_links_dataset" target="${sourcePath}/ebi_links_dataset_old"/>
|
<move source="${sourcePath}/ebi_links_dataset" target="${sourcePath}/ebi_links_dataset_old"/>
|
||||||
<move source="${workingPath}/links_final" target="${sourcePath}/ebi_links_dataset"/>
|
<move source="${workingPath}/links_final" target="${sourcePath}/ebi_links_dataset"/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="CreateEBIDataSet"/>
|
<ok to="StartTransaction"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
<action name="StartTransaction">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>NEW_VERSION</arg>
|
||||||
|
<arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
<capture-output/>
|
||||||
|
</java>
|
||||||
|
<ok to="CreateEBIDataSet"/>
|
||||||
|
<error to="RollBack"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
<action name="CreateEBIDataSet">
|
<action name="CreateEBIDataSet">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
|
@ -95,11 +107,49 @@
|
||||||
${sparkExtraOPT}
|
${sparkExtraOPT}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/ebi_links_dataset</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/ebi_links_dataset</arg>
|
||||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
<arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
<arg>--master</arg><arg>yarn</arg>
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<action name="CommitVersion">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>COMMIT</arg>
|
||||||
|
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="RollBack">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>ROLLBACK</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="Kill"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
|
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -2,13 +2,15 @@ package eu.dnetlib.dhp.sx.bio
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
import eu.dnetlib.dhp.collection.CollectionUtils
|
import eu.dnetlib.dhp.collection.CollectionUtils
|
||||||
|
import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH,MDSTORE_SIZE_PATH}
|
||||||
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||||
import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
|
import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}
|
||||||
object SparkTransformBioDatabaseToOAF {
|
object SparkTransformBioDatabaseToOAF {
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
def main(args: Array[String]): Unit = {
|
||||||
|
@ -25,8 +27,13 @@ object SparkTransformBioDatabaseToOAF {
|
||||||
|
|
||||||
val dbPath: String = parser.get("dbPath")
|
val dbPath: String = parser.get("dbPath")
|
||||||
log.info("dbPath: {}", database)
|
log.info("dbPath: {}", database)
|
||||||
val targetPath: String = parser.get("targetPath")
|
|
||||||
log.info("targetPath: {}", database)
|
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
||||||
|
log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
|
||||||
|
|
||||||
|
val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
||||||
|
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
||||||
|
log.info("outputBasePath: {}", outputBasePath)
|
||||||
|
|
||||||
val spark: SparkSession =
|
val spark: SparkSession =
|
||||||
SparkSession
|
SparkSession
|
||||||
|
@ -43,24 +50,28 @@ object SparkTransformBioDatabaseToOAF {
|
||||||
case "UNIPROT" =>
|
case "UNIPROT" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))),
|
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))),
|
||||||
targetPath
|
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
||||||
)
|
)
|
||||||
case "PDB" =>
|
case "PDB" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))),
|
spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))),
|
||||||
targetPath
|
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
||||||
)
|
)
|
||||||
case "SCHOLIX" =>
|
case "SCHOLIX" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)),
|
spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)),
|
||||||
targetPath
|
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
||||||
)
|
)
|
||||||
case "CROSSREF_LINKS" =>
|
case "CROSSREF_LINKS" =>
|
||||||
CollectionUtils.saveDataset(
|
CollectionUtils.saveDataset(
|
||||||
spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))),
|
spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))),
|
||||||
targetPath
|
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
|
||||||
|
val mdStoreSize = df.count
|
||||||
|
writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,9 @@ import org.apache.commons.io.IOUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
|
||||||
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile}
|
||||||
|
|
||||||
object SparkEBILinksToOaf {
|
object SparkEBILinksToOaf {
|
||||||
|
|
||||||
|
@ -32,8 +35,13 @@ object SparkEBILinksToOaf {
|
||||||
import spark.implicits._
|
import spark.implicits._
|
||||||
val sourcePath = parser.get("sourcePath")
|
val sourcePath = parser.get("sourcePath")
|
||||||
log.info(s"sourcePath -> $sourcePath")
|
log.info(s"sourcePath -> $sourcePath")
|
||||||
val targetPath = parser.get("targetPath")
|
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
||||||
log.info(s"targetPath -> $targetPath")
|
log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion)
|
||||||
|
|
||||||
|
val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
||||||
|
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
||||||
|
log.info("outputBasePath: {}", outputBasePath)
|
||||||
|
|
||||||
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
||||||
|
|
||||||
val ebLinks: Dataset[EBILinkItem] = spark.read
|
val ebLinks: Dataset[EBILinkItem] = spark.read
|
||||||
|
@ -46,7 +54,10 @@ object SparkEBILinksToOaf {
|
||||||
.flatMap(j => BioDBToOAF.parse_ebi_links(j.links))
|
.flatMap(j => BioDBToOAF.parse_ebi_links(j.links))
|
||||||
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
|
.filter(p => BioDBToOAF.EBITargetLinksFilter(p))
|
||||||
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)),
|
.flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)),
|
||||||
targetPath
|
s"$outputBasePath/$MDSTORE_DATA_PATH"
|
||||||
)
|
)
|
||||||
|
val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH")
|
||||||
|
val mdStoreSize = df.count
|
||||||
|
writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue