update mag mapping
parent
d34cef3f8d
commit
cbd4e5e4bb
@ -0,0 +1,21 @@
|
||||
[
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "md",
|
||||
"paramLongName": "mdstorePath",
|
||||
"paramDescription": "The base path of MAG DUMP CSV Tables",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "w",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "The working path",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
@ -0,0 +1,21 @@
|
||||
[
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "mp",
|
||||
"paramLongName": "magBasePath",
|
||||
"paramDescription": "The base path of MAG DUMP CSV Tables",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "w",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "The working path",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
@ -0,0 +1,38 @@
|
||||
package eu.dnetlib.dhp.collection.mag
|
||||
|
||||
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
class SparkMAGtoOAF(propertyPath: String, args: Array[String], log: Logger)
|
||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||
|
||||
/** Here all the spark applications runs this method
|
||||
* where the whole logic of the spark node is defined
|
||||
*/
|
||||
override def run(): Unit = {
|
||||
val mdstorePath: String = parser.get("mdstorePath")
|
||||
log.info("found parameters mdstorePath: {}", mdstorePath)
|
||||
val workingPath: String = parser.get("workingPath")
|
||||
log.info("found parameters workingPath: {}", workingPath)
|
||||
convertMAG(spark, workingPath, mdstorePath)
|
||||
}
|
||||
|
||||
def convertMAG(spark: SparkSession, workingPath: String, mdStorePath: String): Unit = {
|
||||
import spark.implicits._
|
||||
val papers = spark.read.load(s"$workingPath/mag").as[MAGPaper]
|
||||
val total = papers.count()
|
||||
log.info(s"TOTAL PAPERS: $total")
|
||||
}
|
||||
}
|
||||
|
||||
object SparkMAGtoOAF {
|
||||
|
||||
val log: Logger = LoggerFactory.getLogger(SparkMAGtoOAF.getClass)
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
new SparkMAGtoOAF("/eu/dnetlib/dhp/collection/mag/convert_MAG_to_OAF_properties.json", args, log)
|
||||
.initialize()
|
||||
.run()
|
||||
}
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package eu.dnetlib.dhp.collection.mag
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.apache.spark.sql.functions.col
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class MAGMappingTest {
|
||||
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
@Test
|
||||
def mappingTest(): Unit = {
|
||||
|
||||
val spark = SparkSession
|
||||
.builder()
|
||||
.appName("Test")
|
||||
.master("local[*]")
|
||||
.getOrCreate()
|
||||
|
||||
import spark.implicits._
|
||||
|
||||
val magDS = spark.read.load("/home/sandro/Downloads/mag").as[MAGPaper].where(col("journalId").isNotNull)
|
||||
|
||||
val paper = magDS.first()
|
||||
|
||||
print(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(MagUtility.convertMAGtoOAF(paper)))
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue