Modification of Microsoft Academic Graph Mapping #435
|
@ -79,23 +79,6 @@ object MagUtility extends Serializable {
|
||||||
private val MAGCollectedFrom = keyValue(ModelConstants.MAG_ID, ModelConstants.MAG_NAME)
|
private val MAGCollectedFrom = keyValue(ModelConstants.MAG_ID, ModelConstants.MAG_NAME)
|
||||||
|
|
||||||
private val MAGDataInfo: DataInfo = {
|
private val MAGDataInfo: DataInfo = {
|
||||||
val di = new DataInfo
|
|
||||||
di.setDeletedbyinference(false)
|
|
||||||
di.setInferred(false)
|
|
||||||
di.setInvisible(false)
|
|
||||||
di.setTrust("0.9")
|
|
||||||
di.setProvenanceaction(
|
|
||||||
OafMapperUtils.qualifier(
|
|
||||||
ModelConstants.SYSIMPORT_ACTIONSET,
|
|
||||||
ModelConstants.SYSIMPORT_ACTIONSET,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS
|
|
||||||
)
|
|
||||||
)
|
|
||||||
di
|
|
||||||
}
|
|
||||||
|
|
||||||
private val MAGDataInfoInvisible: DataInfo = {
|
|
||||||
val di = new DataInfo
|
val di = new DataInfo
|
||||||
di.setDeletedbyinference(false)
|
di.setDeletedbyinference(false)
|
||||||
di.setInferred(false)
|
di.setInferred(false)
|
||||||
|
@ -111,8 +94,7 @@ object MagUtility extends Serializable {
|
||||||
)
|
)
|
||||||
di
|
di
|
||||||
}
|
}
|
||||||
|
val datatypedict = Map(
|
||||||
val datatypedict = Map(
|
|
||||||
"bool" -> BooleanType,
|
"bool" -> BooleanType,
|
||||||
"int" -> IntegerType,
|
"int" -> IntegerType,
|
||||||
"uint" -> IntegerType,
|
"uint" -> IntegerType,
|
||||||
|
@ -453,7 +435,6 @@ object MagUtility extends Serializable {
|
||||||
|
|
||||||
case "repository" =>
|
case "repository" =>
|
||||||
result = new Publication()
|
result = new Publication()
|
||||||
result.setDataInfo(MAGDataInfoInvisible)
|
|
||||||
qualifier(
|
qualifier(
|
||||||
"0038",
|
"0038",
|
||||||
"Other literature type",
|
"Other literature type",
|
||||||
|
@ -488,7 +469,6 @@ object MagUtility extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result != null) {
|
if (result != null) {
|
||||||
if (result.getDataInfo == null)
|
|
||||||
result.setDataInfo(MAGDataInfo)
|
result.setDataInfo(MAGDataInfo)
|
||||||
val i = new Instance
|
val i = new Instance
|
||||||
i.setInstancetype(tp)
|
i.setInstancetype(tp)
|
||||||
|
@ -512,7 +492,7 @@ object MagUtility extends Serializable {
|
||||||
return null
|
return null
|
||||||
|
|
||||||
result.setCollectedfrom(List(MAGCollectedFrom).asJava)
|
result.setCollectedfrom(List(MAGCollectedFrom).asJava)
|
||||||
val pidList = List(
|
var pidList = List(
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
paper.paperId.get.toString,
|
paper.paperId.get.toString,
|
||||||
qualifier(
|
qualifier(
|
||||||
|
@ -525,7 +505,7 @@ object MagUtility extends Serializable {
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
result.setPid(pidList.asJava)
|
|
||||||
|
|
||||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||||
|
|
||||||
|
@ -618,10 +598,9 @@ object MagUtility extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
val instance = result.getInstance().get(0)
|
val instance = result.getInstance().get(0)
|
||||||
instance.setPid(pidList.asJava)
|
|
||||||
if (paper.doi.orNull != null)
|
if (paper.doi.orNull != null) {
|
||||||
instance.setAlternateIdentifier(
|
pidList = pidList ::: List(
|
||||||
List(
|
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
paper.doi.get,
|
paper.doi.get,
|
||||||
qualifier(
|
qualifier(
|
||||||
|
@ -632,8 +611,10 @@ object MagUtility extends Serializable {
|
||||||
),
|
),
|
||||||
null
|
null
|
||||||
)
|
)
|
||||||
).asJava
|
|
||||||
)
|
)
|
||||||
|
}
|
||||||
|
instance.setPid(pidList.asJava)
|
||||||
|
result.setPid(pidList.asJava)
|
||||||
instance.setUrl(paper.urls.get.asJava)
|
instance.setUrl(paper.urls.get.asJava)
|
||||||
instance.setHostedby(ModelConstants.UNKNOWN_REPOSITORY)
|
instance.setHostedby(ModelConstants.UNKNOWN_REPOSITORY)
|
||||||
instance.setCollectedfrom(MAGCollectedFrom)
|
instance.setCollectedfrom(MAGCollectedFrom)
|
||||||
|
|
|
@ -35,9 +35,12 @@ class SparkMAGtoOAF(propertyPath: String, args: Array[String], log: Logger)
|
||||||
def convertMAG(spark: SparkSession, magBasePath: String, mdStorePath: String): Unit = {
|
def convertMAG(spark: SparkSession, magBasePath: String, mdStorePath: String): Unit = {
|
||||||
import spark.implicits._
|
import spark.implicits._
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
spark.read
|
spark.read
|
||||||
.load(s"$magBasePath/mag_denormalized")
|
.load(s"$magBasePath/mag_denormalized")
|
||||||
.as[MAGPaper]
|
.as[MAGPaper]
|
||||||
|
.filter(col("doi").isNotNull)
|
||||||
.map(s => MagUtility.convertMAGtoOAF(s))
|
.map(s => MagUtility.convertMAGtoOAF(s))
|
||||||
.filter(s => s != null)
|
.filter(s => s != null)
|
||||||
.write
|
.write
|
||||||
|
|
|
@ -3,13 +3,17 @@ package eu.dnetlib.dhp.collection.mag
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Dataset, Publication, Result}
|
import eu.dnetlib.dhp.schema.oaf.{Dataset, Publication, Result}
|
||||||
import org.apache.spark.sql.SparkSession
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.apache.spark.sql.functions.col
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MAGMappingTest {
|
class MAGMappingTest {
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
|
||||||
def mappingTest(): Unit = {
|
def mappingTest(): Unit = {
|
||||||
|
|
||||||
val spark = SparkSession
|
val spark = SparkSession
|
||||||
|
@ -18,12 +22,12 @@ class MAGMappingTest {
|
||||||
.master("local[*]")
|
.master("local[*]")
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
|
|
||||||
val s = new SparkMagOrganizationAS(null, null, null)
|
val s = new SparkMAGtoOAF(null, null, null)
|
||||||
|
s.convertMAG(spark, "/Users/sandro/Downloads/", "/Users/sandro/Downloads/mag_OAF")
|
||||||
s.generateAS(spark, "/home/sandro/Downloads/mag_test", "/home/sandro/Downloads/mag_AS")
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def mappingMagType(): Unit = {
|
def mappingMagType(): Unit = {
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue