enrichment steps #38
|
@ -271,6 +271,26 @@ object DoiBoostMappingUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = {
|
||||||
|
val sp = new StructuredProperty
|
||||||
|
sp.setQualifier(createQualifier(classId,className, schemeId, schemeName))
|
||||||
|
sp.setValue(value)
|
||||||
|
sp
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = {
|
||||||
|
val sp = new StructuredProperty
|
||||||
|
sp.setQualifier(createQualifier(classId,className, schemeId, schemeName))
|
||||||
|
sp.setValue(value)
|
||||||
|
sp.setDataInfo(dataInfo)
|
||||||
|
sp
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(createQualifier(classId, schemeId))
|
sp.setQualifier(createQualifier(classId, schemeId))
|
||||||
|
@ -279,6 +299,8 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
|
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(createQualifier(classId, schemeId))
|
sp.setQualifier(createQualifier(classId, schemeId))
|
||||||
|
|
|
@ -129,16 +129,16 @@ case object ConversionUtil {
|
||||||
val fieldOfStudy = item._2
|
val fieldOfStudy = item._2
|
||||||
if (fieldOfStudy != null && fieldOfStudy.subjects != null && fieldOfStudy.subjects.nonEmpty) {
|
if (fieldOfStudy != null && fieldOfStudy.subjects != null && fieldOfStudy.subjects.nonEmpty) {
|
||||||
val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => {
|
val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => {
|
||||||
val s1 = createSP(s.DisplayName, "keyword", "dnet:subject_classification_typologies")
|
val s1 = createSP(s.DisplayName, "MAG","Microsoft Academic Graph classification", "dnet:subject_classification_typologies", "dnet:subject_classification_typologies")
|
||||||
val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString)
|
val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString)
|
||||||
var resList: List[StructuredProperty] = List(s1)
|
var resList: List[StructuredProperty] = List(s1)
|
||||||
if (s.MainType.isDefined) {
|
if (s.MainType.isDefined) {
|
||||||
val maintp = s.MainType.get
|
val maintp = s.MainType.get
|
||||||
val s2 = createSP(s.MainType.get, "keyword", "dnet:subject_classification_typologies")
|
val s2 = createSP(s.MainType.get, "MAG","Microsoft Academic Graph classification", "dnet:subject_classification_typologies", "dnet:subject_classification_typologies")
|
||||||
s2.setDataInfo(di)
|
s2.setDataInfo(di)
|
||||||
resList = resList ::: List(s2)
|
resList = resList ::: List(s2)
|
||||||
if (maintp.contains(".")) {
|
if (maintp.contains(".")) {
|
||||||
val s3 = createSP(maintp.split("\\.").head, "keyword", "dnet:subject_classification_typologies")
|
val s3 = createSP(maintp.split("\\.").head, "MAG","Microsoft Academic Graph classification", "dnet:subject_classification_typologies", "dnet:subject_classification_typologies")
|
||||||
s3.setDataInfo(di)
|
s3.setDataInfo(di)
|
||||||
resList = resList ::: List(s3)
|
resList = resList ::: List(s3)
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@ object SparkPreProcessMAG {
|
||||||
val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
|
val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
|
||||||
distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct")
|
distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct")
|
||||||
|
|
||||||
logger.info("Phase 6) Enrich Publication with description")
|
logger.info("Phase 0) Enrich Publication with description")
|
||||||
val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
|
val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
|
||||||
pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
|
pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<start to="GenerateActionSet"/>
|
<start to="CreateDOIBoost"/>
|
||||||
|
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
|
|
|
@ -18,6 +18,9 @@ class CrossrefMappingTest {
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testFunderRelationshipsMapping(): Unit = {
|
def testFunderRelationshipsMapping(): Unit = {
|
||||||
val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString
|
val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString
|
||||||
|
@ -58,6 +61,27 @@ class CrossrefMappingTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testOrcidID() :Unit = {
|
||||||
|
val json = Source.fromInputStream(getClass.getResourceAsStream("orcid_data.json")).mkString
|
||||||
|
|
||||||
|
|
||||||
|
assertNotNull(json)
|
||||||
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||||
|
|
||||||
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||||
|
|
||||||
|
|
||||||
|
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||||
|
items.foreach(p => println(mapper.writeValueAsString(p)))
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testEmptyTitle() :Unit = {
|
def testEmptyTitle() :Unit = {
|
||||||
val json = Source.fromInputStream(getClass.getResourceAsStream("empty_title.json")).mkString
|
val json = Source.fromInputStream(getClass.getResourceAsStream("empty_title.json")).mkString
|
||||||
|
|
|
@ -0,0 +1,271 @@
|
||||||
|
{
|
||||||
|
"DOI":"10.1016/j.carbpol.2020.115930",
|
||||||
|
"issued":{
|
||||||
|
"date-parts":[
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
4
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"published-print":{
|
||||||
|
"date-parts":[
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
4
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"prefix":"10.1016",
|
||||||
|
"subject":[
|
||||||
|
"Organic Chemistry",
|
||||||
|
"Materials Chemistry",
|
||||||
|
"Polymers and Plastics"
|
||||||
|
],
|
||||||
|
"author":[
|
||||||
|
{
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"given":"Lei",
|
||||||
|
"family":"Fang",
|
||||||
|
"sequence":"first"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"given":"Hua",
|
||||||
|
"family":"Lin",
|
||||||
|
"sequence":"additional"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"given":"Zhenfeng",
|
||||||
|
"family":"Wu",
|
||||||
|
"sequence":"additional"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"given":"Zhen",
|
||||||
|
"family":"Wang",
|
||||||
|
"sequence":"additional"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"given":"Xinxin",
|
||||||
|
"family":"Fan",
|
||||||
|
"sequence":"additional"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"given":"Ziting",
|
||||||
|
"family":"Cheng",
|
||||||
|
"sequence":"additional"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"given":"Xiaoya",
|
||||||
|
"family":"Hou",
|
||||||
|
"sequence":"additional"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"authenticated-orcid":false,
|
||||||
|
"given":"Daquan",
|
||||||
|
"family":"Chen",
|
||||||
|
"sequence":"additional",
|
||||||
|
"affiliation":[
|
||||||
|
|
||||||
|
],
|
||||||
|
"ORCID":"http://orcid.org/0000-0002-6796-0204"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"reference-count":41,
|
||||||
|
"ISSN":[
|
||||||
|
"0144-8617"
|
||||||
|
],
|
||||||
|
"assertion":[
|
||||||
|
{
|
||||||
|
"name":"publisher",
|
||||||
|
"value":"Elsevier",
|
||||||
|
"label":"This article is maintained by"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"articletitle",
|
||||||
|
"value":"In vitro/vivo evaluation of novel mitochondrial targeting charge-reversal polysaccharide-based antitumor nanoparticle",
|
||||||
|
"label":"Article Title"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"journaltitle",
|
||||||
|
"value":"Carbohydrate Polymers",
|
||||||
|
"label":"Journal Title"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"articlelink",
|
||||||
|
"value":"https://doi.org/10.1016/j.carbpol.2020.115930",
|
||||||
|
"label":"CrossRef DOI link to publisher maintained version"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"content_type",
|
||||||
|
"value":"article",
|
||||||
|
"label":"Content Type"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"copyright",
|
||||||
|
"value":"\\u00a9 2020 Elsevier Ltd. All rights reserved.",
|
||||||
|
"label":"Copyright"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"member":"78",
|
||||||
|
"source":"Crossref",
|
||||||
|
"score":1.0,
|
||||||
|
"deposited":{
|
||||||
|
"timestamp":1584590965000,
|
||||||
|
"date-time":"2020-03-19T04:09:25Z",
|
||||||
|
"date-parts":[
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
3,
|
||||||
|
19
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"indexed":{
|
||||||
|
"timestamp":1584592912467,
|
||||||
|
"date-time":"2020-03-19T04:41:52Z",
|
||||||
|
"date-parts":[
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
3,
|
||||||
|
19
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"type":"journal-article",
|
||||||
|
"URL":"http://dx.doi.org/10.1016/j.carbpol.2020.115930",
|
||||||
|
"is-referenced-by-count":0,
|
||||||
|
"volume":"234",
|
||||||
|
"issn-type":[
|
||||||
|
{
|
||||||
|
"type":"print",
|
||||||
|
"value":"0144-8617"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"link":[
|
||||||
|
{
|
||||||
|
"URL":"https://api.elsevier.com/content/article/PII:S0144861720301041?httpAccept=text/xml",
|
||||||
|
"intended-application":"text-mining",
|
||||||
|
"content-version":"vor",
|
||||||
|
"content-type":"text/xml"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"URL":"https://api.elsevier.com/content/article/PII:S0144861720301041?httpAccept=text/plain",
|
||||||
|
"intended-application":"text-mining",
|
||||||
|
"content-version":"vor",
|
||||||
|
"content-type":"text/plain"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"update-policy":"http://dx.doi.org/10.1016/elsevier_cm_policy",
|
||||||
|
"references-count":41,
|
||||||
|
"short-container-title":[
|
||||||
|
"Carbohydrate Polymers"
|
||||||
|
],
|
||||||
|
"publisher":"Elsevier BV",
|
||||||
|
"content-domain":{
|
||||||
|
"domain":[
|
||||||
|
"elsevier.com",
|
||||||
|
"sciencedirect.com"
|
||||||
|
],
|
||||||
|
"crossmark-restriction":true
|
||||||
|
},
|
||||||
|
"language":"en",
|
||||||
|
"license":[
|
||||||
|
{
|
||||||
|
"URL":"https://www.elsevier.com/tdm/userlicense/1.0/",
|
||||||
|
"start":{
|
||||||
|
"timestamp":1585699200000,
|
||||||
|
"date-time":"2020-04-01T00:00:00Z",
|
||||||
|
"date-parts":[
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
4,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"content-version":"tdm",
|
||||||
|
"delay-in-days":0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created":{
|
||||||
|
"timestamp":1581759678000,
|
||||||
|
"date-time":"2020-02-15T09:41:18Z",
|
||||||
|
"date-parts":[
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
2,
|
||||||
|
15
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"title":[
|
||||||
|
"In vitro/vivo evaluation of novel mitochondrial targeting charge-reversal polysaccharide-based antitumor nanoparticle"
|
||||||
|
],
|
||||||
|
"alternative-id":[
|
||||||
|
"S0144861720301041"
|
||||||
|
],
|
||||||
|
"container-title":[
|
||||||
|
"Carbohydrate Polymers"
|
||||||
|
],
|
||||||
|
"funder":[
|
||||||
|
{
|
||||||
|
"doi-asserted-by":"publisher",
|
||||||
|
"DOI":"10.13039/501100007129",
|
||||||
|
"name":"Natural Science Foundation of Shandong Province",
|
||||||
|
"award":[
|
||||||
|
"ZR2019ZD24",
|
||||||
|
"ZR2019YQ30"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by":"publisher",
|
||||||
|
"DOI":"10.13039/100010449",
|
||||||
|
"name":"Ministry of Education, Libya",
|
||||||
|
"award":[
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by":"publisher",
|
||||||
|
"DOI":"10.13039/501100012249",
|
||||||
|
"name":"Jiangxi University of Traditional Chinese Medicine",
|
||||||
|
"award":[
|
||||||
|
"TCM-0906"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"Taishan Scholar Program",
|
||||||
|
"award":[
|
||||||
|
"qnts20161035"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"Open fund project of Key Laboratory of Modern Preparation of TCM",
|
||||||
|
"award":[
|
||||||
|
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"page":"115930",
|
||||||
|
"article-number":"115930"
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.SortableRelationKey;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class SortableRelationKeyTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void doTesSorting() throws IOException {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
final String json = IOUtils.toString(this.getClass().getResourceAsStream("relations.json"));
|
||||||
|
final List<Relation> relations = mapper.readValue(json, new TypeReference<List<Relation>>() { });
|
||||||
|
|
||||||
|
|
||||||
|
relations.stream().map(r -> SortableRelationKey.create(r, r.getSource())).sorted()
|
||||||
|
.forEach(
|
||||||
|
|
||||||
|
it -> {
|
||||||
|
try {
|
||||||
|
System.out.println(mapper.writeValueAsString(it));
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"collectedfrom": [],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:entityregistry",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1592688952862,
|
||||||
|
"properties": [],
|
||||||
|
"relClass": "hasAuthorInstitution",
|
||||||
|
"relType": "resultOrganization",
|
||||||
|
"source": "1",
|
||||||
|
"subRelType": "affiliation",
|
||||||
|
"target": "2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": [],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:entityregistry",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1592688952862,
|
||||||
|
"properties": [],
|
||||||
|
"relClass": "isAuthorInstitutionOf",
|
||||||
|
"relType": "resultOrganization",
|
||||||
|
"source": "2",
|
||||||
|
"subRelType": "affiliation",
|
||||||
|
"target": "1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": [],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:entityregistry",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1592688952862,
|
||||||
|
"properties": [],
|
||||||
|
"relClass": "isProducedBy",
|
||||||
|
"relType": "resultProject",
|
||||||
|
"source": "1",
|
||||||
|
"subRelType": "outcome",
|
||||||
|
"target": "2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": [],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:entityregistry",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1592688952862,
|
||||||
|
"properties": [],
|
||||||
|
"relClass": "produces",
|
||||||
|
"relType": "resultProject",
|
||||||
|
"source": "2",
|
||||||
|
"subRelType": "outcome",
|
||||||
|
"target": "1"
|
||||||
|
}
|
||||||
|
]
|
Loading…
Reference in New Issue