forked from D-Net/dnet-hadoop
Merge pull request 'doidoost_dismiss' (#418) from doidoost_dismiss into beta
Reviewed-on: D-Net/dnet-hadoop#418
This commit is contained in:
commit
b554c41cc7
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class DoiCleaningRule {
|
public class DoiCleaningRule {
|
||||||
|
|
||||||
public static String clean(final String doi) {
|
public static String clean(final String doi) {
|
||||||
|
@ -11,4 +13,26 @@ public class DoiCleaningRule {
|
||||||
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String normalizeDoi(final String input) {
|
||||||
|
if (input == null)
|
||||||
|
return null;
|
||||||
|
final String replaced = input
|
||||||
|
.replaceAll("\\n|\\r|\\t|\\s", "")
|
||||||
|
.toLowerCase()
|
||||||
|
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
||||||
|
if (StringUtils.isEmpty(replaced))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (!replaced.contains("10."))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
final String ret = replaced.substring(replaced.indexOf("10."));
|
||||||
|
|
||||||
|
if (!ret.startsWith(CleaningFunctions.DOI_PREFIX))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
package eu.dnetlib.dhp.application
|
package eu.dnetlib.dhp.application
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.Constants
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile
|
||||||
|
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
/** This is the main Interface SparkApplication
|
/** This is the main Interface SparkApplication
|
||||||
|
@ -70,4 +73,13 @@ abstract class AbstractScalaApplication(
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def reportTotalSize(targetPath: String, outputBasePath: String): Unit = {
|
||||||
|
val total_items = spark.read.text(targetPath).count()
|
||||||
|
writeHdfsFile(
|
||||||
|
spark.sparkContext.hadoopConfiguration,
|
||||||
|
s"$total_items",
|
||||||
|
outputBasePath + Constants.MDSTORE_SIZE_PATH
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "m",
|
||||||
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "the master name",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "s",
|
||||||
|
"paramLongName": "sourcePath",
|
||||||
|
"paramDescription": "The base path of Crossref DUMP",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "uw",
|
||||||
|
"paramLongName": "unpaywallPath",
|
||||||
|
"paramDescription": "The base path of unpaywall DUMP",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "mov",
|
||||||
|
"paramLongName": "mdstoreOutputVersion",
|
||||||
|
"paramDescription": "The mdstore Output Version",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "i",
|
||||||
|
"paramLongName": "isLookupUrl",
|
||||||
|
"paramDescription": "the Information System Service LookUp URL",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,916 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "100007630",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100007630",
|
||||||
|
"name": "College of Engineering and Informatics, National University of Ireland, Galway",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100007731",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100007731",
|
||||||
|
"name": "Endo International",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100008099",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100008099",
|
||||||
|
"name": "Food Safety Authority of Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100008124",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100008124",
|
||||||
|
"name": "Department of Jobs, Enterprise and Innovation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100009098",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100009098",
|
||||||
|
"name": "Department of Foreign Affairs and Trade, Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100009099",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100009099",
|
||||||
|
"name": "Irish Aid",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100009770",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100009770",
|
||||||
|
"name": "National University of Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100009985",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100009985",
|
||||||
|
"name": "Parkinson's Association of Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100010399",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100010399",
|
||||||
|
"name": "European Society of Cataract and Refractive Surgeons",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100010414",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100010414",
|
||||||
|
"name": "Health Research Board",
|
||||||
|
"synonym": [
|
||||||
|
"501100001590"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100010546",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100010546",
|
||||||
|
"name": "Deparment of Children and Youth Affairs, Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100010993",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100010993",
|
||||||
|
"name": "Irish Nephrology Society",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100011096",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100011096",
|
||||||
|
"name": "Jazz Pharmaceuticals",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100011396",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100011396",
|
||||||
|
"name": "Irish College of General Practitioners",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100012734",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100012734",
|
||||||
|
"name": "Department for Culture, Heritage and the Gaeltacht, Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100012754",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100012754",
|
||||||
|
"name": "Horizon Pharma",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100012891",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100012891",
|
||||||
|
"name": "Medical Research Charities Group",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100012919",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100012919",
|
||||||
|
"name": "Epilepsy Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100012920",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100012920",
|
||||||
|
"name": "GLEN",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100012921",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100012921",
|
||||||
|
"name": "Royal College of Surgeons in Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100013029",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100013029",
|
||||||
|
"name": "Iris O'Brien Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100013206",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100013206",
|
||||||
|
"name": "Food Institutional Research Measure",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100013381",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100013381",
|
||||||
|
"name": "Irish Phytochemical Food Network",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100013433",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100013433",
|
||||||
|
"name": "Transport Infrastructure Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100013461",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100013461",
|
||||||
|
"name": "Arts and Disability Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100013548",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100013548",
|
||||||
|
"name": "Filmbase",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100013917",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100013917",
|
||||||
|
"name": "Society for Musicology in Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100014251",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100014251",
|
||||||
|
"name": "Humanities in the European Research Area",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100014364",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100014364",
|
||||||
|
"name": "National Children's Research Centre",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100014384",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100014384",
|
||||||
|
"name": "Amarin Corporation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100014902",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100014902",
|
||||||
|
"name": "Irish Association for Cancer Research",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100015023",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015023",
|
||||||
|
"name": "Ireland Funds",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100015037",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015037",
|
||||||
|
"name": "Simon Cumbers Media Fund",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100015319",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015319",
|
||||||
|
"name": "Sport Ireland Institute",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100015320",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015320",
|
||||||
|
"name": "Paralympics Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100015442",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015442",
|
||||||
|
"name": "Global Brain Health Institute",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100015992",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015992",
|
||||||
|
"name": "St. Luke's Institute of Cancer Research",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100017897",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100017897",
|
||||||
|
"name": "Friedreich\u2019s Ataxia Research Alliance Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018064",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018064",
|
||||||
|
"name": "Department of Tourism, Culture, Arts, Gaeltacht, Sport and Media",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018172",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018172",
|
||||||
|
"name": "Department of the Environment, Climate and Communications",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018175",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018175",
|
||||||
|
"name": "Dairy Processing Technology Centre",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018270",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018270",
|
||||||
|
"name": "Health Service Executive",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018529",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018529",
|
||||||
|
"name": "Alkermes",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018542",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018542",
|
||||||
|
"name": "Irish Endocrine Society",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018754",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018754",
|
||||||
|
"name": "An Roinn Sl\u00e1inte",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100018998",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100018998",
|
||||||
|
"name": "Irish Research eLibrary",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100019428",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100019428",
|
||||||
|
"name": "Nabriva Therapeutics",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100019637",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100019637",
|
||||||
|
"name": "Horizon Therapeutics",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100020174",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100020174",
|
||||||
|
"name": "Health Research Charities Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100020202",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100020202",
|
||||||
|
"name": "UCD Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100020233",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100020233",
|
||||||
|
"name": "Ireland Canada University Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100022943",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100022943",
|
||||||
|
"name": "National Cancer Registry Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001581",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001581",
|
||||||
|
"name": "Arts Council of Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001582",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001582",
|
||||||
|
"name": "Centre for Ageing Research and Development in Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001583",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001583",
|
||||||
|
"name": "Cystinosis Foundation Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001584",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001584",
|
||||||
|
"name": "Department of Agriculture, Food and the Marine, Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001586",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001586",
|
||||||
|
"name": "Department of Education and Skills, Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001587",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001587",
|
||||||
|
"name": "Economic and Social Research Institute",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001588",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001588",
|
||||||
|
"name": "Enterprise Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001589",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001589",
|
||||||
|
"name": "Environmental Protection Agency",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001591",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001591",
|
||||||
|
"name": "Heritage Council",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001592",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001592",
|
||||||
|
"name": "Higher Education Authority",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001593",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001593",
|
||||||
|
"name": "Irish Cancer Society",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001594",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001594",
|
||||||
|
"name": "Irish Heart Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001595",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001595",
|
||||||
|
"name": "Irish Hospice Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001598",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001598",
|
||||||
|
"name": "Mental Health Commission",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001600",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001600",
|
||||||
|
"name": "Research and Education Foundation, Sligo General Hospital",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001601",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001601",
|
||||||
|
"name": "Royal Irish Academy",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001603",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001603",
|
||||||
|
"name": "Sustainable Energy Authority of Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001604",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001604",
|
||||||
|
"name": "Teagasc",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001627",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001627",
|
||||||
|
"name": "Marine Institute",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001628",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001628",
|
||||||
|
"name": "Central Remedial Clinic",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001629",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001629",
|
||||||
|
"name": "Royal Dublin Society",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001630",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001630",
|
||||||
|
"name": "Dublin Institute for Advanced Studies",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001631",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001631",
|
||||||
|
"name": "University College Dublin",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001633",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001633",
|
||||||
|
"name": "National University of Ireland, Maynooth",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001634",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001634",
|
||||||
|
"name": "University of Galway",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001635",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001635",
|
||||||
|
"name": "University of Limerick",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001636",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001636",
|
||||||
|
"name": "University College Cork",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001637",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001637",
|
||||||
|
"name": "Trinity College Dublin",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001638",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001638",
|
||||||
|
"name": "Dublin City University",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100002081",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100002081",
|
||||||
|
"name": "Irish Research Council",
|
||||||
|
"synonym": ["501100001596", "501100001597"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100002736",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100002736",
|
||||||
|
"name": "Covidien",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100002755",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100002755",
|
||||||
|
"name": "Brennan and Company",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100002919",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100002919",
|
||||||
|
"name": "Cork Institute of Technology",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100002959",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100002959",
|
||||||
|
"name": "Dublin City Council",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100003036",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100003036",
|
||||||
|
"name": "Perrigo Company Charitable Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100003037",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100003037",
|
||||||
|
"name": "Elan",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100003496",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100003496",
|
||||||
|
"name": "HeyStaks Technologies",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100003553",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100003553",
|
||||||
|
"name": "Gaelic Athletic Association",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100003840",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100003840",
|
||||||
|
"name": "Irish Institute of Clinical Neuroscience",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100003956",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100003956",
|
||||||
|
"name": "Aspect Medical Systems",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100004162",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100004162",
|
||||||
|
"name": "Meath Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100004210",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100004210",
|
||||||
|
"name": "Our Lady's Children's Hospital, Crumlin",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100004321",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100004321",
|
||||||
|
"name": "Shire",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100004981",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100004981",
|
||||||
|
"name": "Athlone Institute of Technology",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100006518",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100006518",
|
||||||
|
"name": "Department of Communications, Energy and Natural Resources, Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100006553",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100006553",
|
||||||
|
"name": "Collaborative Centre for Applied Nanotechnology",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100006759",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100006759",
|
||||||
|
"name": "CLARITY Centre for Sensor Web Technologies",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100009246",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100009246",
|
||||||
|
"name": "Technological University Dublin",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100009269",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100009269",
|
||||||
|
"name": "Programme of Competitive Forestry Research for Development",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100009315",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100009315",
|
||||||
|
"name": "Cystinosis Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100010808",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100010808",
|
||||||
|
"name": "Geological Survey of Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100011030",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100011030",
|
||||||
|
"name": "Alimentary Glycoscience Research Cluster",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100011031",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100011031",
|
||||||
|
"name": "Alimentary Health",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100011103",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100011103",
|
||||||
|
"name": "Rann\u00eds",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100012354",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100012354",
|
||||||
|
"name": "Inland Fisheries Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100014384",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100014384",
|
||||||
|
"name": "X-Bolt Orthopaedics",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100014710",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100014710",
|
||||||
|
"name": "PrecisionBiotics Group",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100014827",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100014827",
|
||||||
|
"name": "Dormant Accounts Fund",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100016041",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100016041",
|
||||||
|
"name": "St Vincents Anaesthesia Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100017501",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100017501",
|
||||||
|
"name": "FotoNation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100018641",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100018641",
|
||||||
|
"name": "Dairy Research Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100018839",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100018839",
|
||||||
|
"name": "Irish Centre for High-End Computing",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100019905",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100019905",
|
||||||
|
"name": "Galway University Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020036",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020036",
|
||||||
|
"name": "Dystonia Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020221",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020221",
|
||||||
|
"name": "Irish Motor Neurone Disease Association",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020270",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020270",
|
||||||
|
"name": "Advanced Materials and Bioengineering Research",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020403",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020403",
|
||||||
|
"name": "Irish Composites Centre",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020425",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020425",
|
||||||
|
"name": "Irish Thoracic Society",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100021102",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100021102",
|
||||||
|
"name": "Waterford Institute of Technology",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100021110",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100021110",
|
||||||
|
"name": "Irish MPS Society",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100021525",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100021525",
|
||||||
|
"name": "Insight SFI Research Centre for Data Analytics",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100021694",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100021694",
|
||||||
|
"name": "Elan Pharma International",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100021838",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100021838",
|
||||||
|
"name": "Royal College of Physicians of Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100022542",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100022542",
|
||||||
|
"name": "Breakthrough Cancer Research",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100022610",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100022610",
|
||||||
|
"name": "Breast Cancer Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100022728",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100022728",
|
||||||
|
"name": "Munster Technological University",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100022729",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100022729",
|
||||||
|
"name": "Institute of Technology, Tralee",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100023273",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100023273",
|
||||||
|
"name": "HRB Clinical Research Facility Galway",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100023378",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100023378",
|
||||||
|
"name": "Lauritzson Foundation",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100023551",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100023551",
|
||||||
|
"name": "Cystic Fibrosis Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100023970",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100023970",
|
||||||
|
"name": "Tyndall National Institute",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100024094",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100024094",
|
||||||
|
"name": "Raidi\u00f3 Teilif\u00eds \u00c9ireann",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100024242",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100024242",
|
||||||
|
"name": "Synthesis and Solid State Pharmaceutical Centre",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100024313",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100024313",
|
||||||
|
"name": "Irish Rugby Football Union",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100007490",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100007490",
|
||||||
|
"name": "Bausch and Lomb Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100007819",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100007819",
|
||||||
|
"name": "Allergan",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100010547",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100010547",
|
||||||
|
"name": "Irish Youth Justice Service",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100012733",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100012733",
|
||||||
|
"name": "National Parks and Wildlife Service",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100015278",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100015278",
|
||||||
|
"name": "Pfizer Healthcare Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100017144",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100017144",
|
||||||
|
"name": "Shell E and P Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "100022895",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/100022895",
|
||||||
|
"name": "Health Research Institute, University of Limerick",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100001599",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100001599",
|
||||||
|
"name": "National Council for Forest Research and Development",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100006554",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100006554",
|
||||||
|
"name": "IDA Ireland",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100011626",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100011626",
|
||||||
|
"name": "Energy Policy Research Centre, Economic and Social Research Institute",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100014531",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100014531",
|
||||||
|
"name": "Physical Education and Sport Sciences Department, University of Limerick",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100014745",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100014745",
|
||||||
|
"name": "APC Microbiome Institute",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100014826",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100014826",
|
||||||
|
"name": "ADAPT - Centre for Digital Content Technology",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020570",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020570",
|
||||||
|
"name": "College of Medicine, Nursing and Health Sciences, National University of Ireland, Galway",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100020871",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100020871",
|
||||||
|
"name": "Bernal Institute, University of Limerick",
|
||||||
|
"synonym": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "501100023852",
|
||||||
|
"uri": "http://dx.doi.org/10.13039/501100023852",
|
||||||
|
"name": "Moore Institute for Research in the Humanities and Social Studies, University of Galway",
|
||||||
|
"synonym": []
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,44 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 </value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<value>/user/spark/spark2ApplicationHistory</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</configuration>
|
|
@ -0,0 +1,131 @@
|
||||||
|
<workflow-app name="generate_crossref_Datasource" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>The base path of Crossref DUMP </description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>unpaywallPath</name>
|
||||||
|
<description>The base path of unpaywall DUMP </description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>isLookupUrl</name>
|
||||||
|
<description>The Information service Lookup URL</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mdStoreOutputId</name>
|
||||||
|
<description>the identifier of the cleaned MDStore</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mdStoreManagerURI</name>
|
||||||
|
<description>the path of the cleaned mdstore</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="StartTransaction"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="StartTransaction">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>NEW_VERSION</arg>
|
||||||
|
<arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
<capture-output/>
|
||||||
|
</java>
|
||||||
|
<ok to="generateOAF"/>
|
||||||
|
<error to="EndReadRollBack"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="generateOAF">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Crossref TO OAF</name>
|
||||||
|
<class>eu.dnetlib.dhp.collection.crossref.SparkMapDumpIntoOAF</class>
|
||||||
|
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=2g
|
||||||
|
--conf spark.sql.shuffle.partitions=3000
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--unpaywallPath</arg><arg>${unpaywallPath}</arg>
|
||||||
|
<arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="CommitVersion"/>
|
||||||
|
<error to="RollBack"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="CommitVersion">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>COMMIT</arg>
|
||||||
|
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="EndReadRollBack">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>READ_UNLOCK</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
<arg>--readMDStoreId</arg><arg>${wf:actionData('BeginRead')['mdStoreReadLockVersion']}</arg>
|
||||||
|
<capture-output/>
|
||||||
|
</java>
|
||||||
|
<ok to="RollBack"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="RollBack">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>ROLLBACK</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="Kill"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,21 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "m",
|
||||||
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "the master name",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "mo",
|
||||||
|
"paramLongName": "mdstoreOutputVersion",
|
||||||
|
"paramDescription": "The mdstore output",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ma",
|
||||||
|
"paramLongName": "magBasePath",
|
||||||
|
"paramDescription": "The mag Base path",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,15 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "m",
|
||||||
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "the master name",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "mp",
|
||||||
|
"paramLongName": "magBasePath",
|
||||||
|
"paramDescription": "The base path of MAG DUMP CSV Tables",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,21 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "m",
|
||||||
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "the master name",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "o",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "The as output Path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ma",
|
||||||
|
"paramLongName": "magBasePath",
|
||||||
|
"paramDescription": "The mag Base path",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,23 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,160 @@
|
||||||
|
<workflow-app name="generate_MAG_Datasource" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>magBasePath</name>
|
||||||
|
<description>The base path of MAG DUMP CSV Tables</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mdStoreOutputId</name>
|
||||||
|
<description>the identifier of the cleaned MDStore</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mdStoreManagerURI</name>
|
||||||
|
<description>the path of the cleaned mdstore</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>resume_from</name>
|
||||||
|
<value>generateOAF</value>
|
||||||
|
<description>start Node</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="resume_from"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
|
||||||
|
<decision name="resume_from">
|
||||||
|
<switch>
|
||||||
|
<case to="generateTable">${wf:conf('resume_from') eq 'generateTable'}</case>
|
||||||
|
<default to="StartTransaction"/> <!-- first action to be done when downloadDump is to be performed -->
|
||||||
|
</switch>
|
||||||
|
</decision>
|
||||||
|
|
||||||
|
|
||||||
|
<action name="generateTable">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Generate MAG Table</name>
|
||||||
|
<class>eu.dnetlib.dhp.collection.mag.SparkCreateMagDenormalizedTable</class>
|
||||||
|
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=2g
|
||||||
|
--conf spark.sql.shuffle.partitions=3000
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--magBasePath</arg><arg>${magBasePath}</arg>
|
||||||
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="StartTransaction"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="StartTransaction">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>NEW_VERSION</arg>
|
||||||
|
<arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
<capture-output/>
|
||||||
|
</java>
|
||||||
|
<ok to="generateOAF"/>
|
||||||
|
<error to="EndReadRollBack"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="generateOAF">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>MAG TO OAF</name>
|
||||||
|
<class>eu.dnetlib.dhp.collection.mag.SparkMAGtoOAF</class>
|
||||||
|
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=2g
|
||||||
|
--conf spark.sql.shuffle.partitions=3000
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--magBasePath</arg><arg>${magBasePath}</arg>
|
||||||
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="CommitVersion"/>
|
||||||
|
<error to="RollBack"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="CommitVersion">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>COMMIT</arg>
|
||||||
|
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="EndReadRollBack">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>READ_UNLOCK</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
<arg>--readMDStoreId</arg><arg>${wf:actionData('BeginRead')['mdStoreReadLockVersion']}</arg>
|
||||||
|
<capture-output/>
|
||||||
|
</java>
|
||||||
|
<ok to="RollBack"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="RollBack">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
|
||||||
|
<arg>--action</arg><arg>ROLLBACK</arg>
|
||||||
|
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||||
|
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="Kill"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,147 @@
|
||||||
|
package eu.dnetlib.dhp.collection.crossref
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import eu.dnetlib.dhp.collection.crossref.Crossref2Oaf.{TransformationType, mergeUnpayWall}
|
||||||
|
import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||||
|
import org.apache.spark.sql._
|
||||||
|
import org.apache.spark.sql.functions.{col, explode, lower}
|
||||||
|
import org.apache.spark.sql.types._
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
class SparkMapDumpIntoOAF(propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
val sourcePath = parser.get("sourcePath")
|
||||||
|
log.info("sourcePath: {}", sourcePath)
|
||||||
|
val unpaywallPath = parser.get("unpaywallPath")
|
||||||
|
log.info("unpaywallPath: {}", unpaywallPath)
|
||||||
|
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl)
|
||||||
|
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
||||||
|
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||||
|
require(vocabularies != null)
|
||||||
|
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
||||||
|
log.info(s"mdstoreOutputVersion is '$mdstoreOutputVersion'")
|
||||||
|
|
||||||
|
val mapper = new ObjectMapper()
|
||||||
|
val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
||||||
|
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
||||||
|
log.info(s"outputBasePath is '$outputBasePath'")
|
||||||
|
val targetPath = s"$outputBasePath$MDSTORE_DATA_PATH"
|
||||||
|
log.info(s"targetPath is '$targetPath'")
|
||||||
|
transformCrossref(spark, sourcePath, targetPath, unpaywallPath, vocabularies)
|
||||||
|
reportTotalSize(targetPath, outputBasePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
def transformUnpayWall(spark: SparkSession, unpaywallPath: String, crossrefPath: String): Dataset[UnpayWall] = {
|
||||||
|
val schema = new StructType()
|
||||||
|
.add(StructField("doi", StringType))
|
||||||
|
.add(StructField("is_oa", BooleanType))
|
||||||
|
.add(
|
||||||
|
StructField(
|
||||||
|
"best_oa_location",
|
||||||
|
new StructType()
|
||||||
|
.add("host_type", StringType)
|
||||||
|
.add("license", StringType)
|
||||||
|
.add("url", StringType)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.add("oa_status", StringType)
|
||||||
|
|
||||||
|
import spark.implicits._
|
||||||
|
val cId = spark.read
|
||||||
|
.schema(new StructType().add("DOI", StringType))
|
||||||
|
.json(crossrefPath)
|
||||||
|
.withColumn("doi", lower(col("DOI")))
|
||||||
|
|
||||||
|
val uw = spark.read
|
||||||
|
.schema(schema)
|
||||||
|
.json(unpaywallPath)
|
||||||
|
.withColumn("doi", lower(col("doi")))
|
||||||
|
.where("is_oa = true and best_oa_location.url is not null")
|
||||||
|
|
||||||
|
uw.join(cId, uw("doi") === cId("doi"), "leftsemi").as[UnpayWall].cache()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def transformCrossref(
|
||||||
|
spark: SparkSession,
|
||||||
|
sourcePath: String,
|
||||||
|
targetPath: String,
|
||||||
|
unpaywallPath: String,
|
||||||
|
vocabularies: VocabularyGroup
|
||||||
|
): Unit = {
|
||||||
|
import spark.implicits._
|
||||||
|
|
||||||
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
||||||
|
implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result])
|
||||||
|
|
||||||
|
val dump: Dataset[String] = spark.read.text(sourcePath).as[String]
|
||||||
|
dump
|
||||||
|
.flatMap(s => Crossref2Oaf.convert(s, vocabularies, TransformationType.OnlyRelation))
|
||||||
|
.as[Oaf]
|
||||||
|
.map(r => mapper.writeValueAsString(r))
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(targetPath)
|
||||||
|
val uw = transformUnpayWall(spark, unpaywallPath, sourcePath)
|
||||||
|
val resultCrossref: Dataset[(String, Result)] = dump
|
||||||
|
.flatMap(s => Crossref2Oaf.convert(s, vocabularies, TransformationType.OnlyResult))
|
||||||
|
.as[Oaf]
|
||||||
|
.map(r => r.asInstanceOf[Result])
|
||||||
|
.map(r => (r.getPid.get(0).getValue, r))(Encoders.tuple(Encoders.STRING, resultEncoder))
|
||||||
|
resultCrossref
|
||||||
|
.joinWith(uw, resultCrossref("_1").equalTo(uw("doi")), "left")
|
||||||
|
.map(k => {
|
||||||
|
mergeUnpayWall(k._1._2, k._2)
|
||||||
|
})
|
||||||
|
.map(r => mapper.writeValueAsString(r))
|
||||||
|
.as[Result]
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(s"$targetPath")
|
||||||
|
|
||||||
|
// Generate affiliation relations:
|
||||||
|
spark.read
|
||||||
|
.json(sourcePath)
|
||||||
|
.select(col("DOI"), explode(col("author.affiliation")).alias("affiliations"))
|
||||||
|
.select(col("DOI"), explode(col("affiliations.id")).alias("aids"))
|
||||||
|
.where("aids is not null")
|
||||||
|
.select(col("DOI"), explode(col("aids")).alias("aff"))
|
||||||
|
.select(col("DOI"), col("aff.id").alias("id"), col("aff.id-type").alias("idType"))
|
||||||
|
.where(col("idType").like("ROR"))
|
||||||
|
.flatMap(r => Crossref2Oaf.generateAffliation(r))
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(s"$targetPath")
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object SparkMapDumpIntoOAF {
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
|
||||||
|
|
||||||
|
new SparkMapDumpIntoOAF(
|
||||||
|
log = logger,
|
||||||
|
args = args,
|
||||||
|
propertyPath = "/eu/dnetlib/dhp/collection/crossref/convert_crossref_dump_to_oaf_params.json"
|
||||||
|
).initialize().run()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,778 @@
|
||||||
|
package eu.dnetlib.dhp.collection.mag
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.{OafMapperUtils, PidType}
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, Journal, Organization, Publication, Relation, Result, Dataset => OafDataset}
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
|
import org.apache.spark.sql.types._
|
||||||
|
import org.apache.spark.sql.{Dataset, Row, SparkSession}
|
||||||
|
import org.json4s
|
||||||
|
import org.json4s.DefaultFormats
|
||||||
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
case class MAGPaper(
|
||||||
|
paperId: Option[Long],
|
||||||
|
doi: Option[String],
|
||||||
|
docType: Option[String],
|
||||||
|
paperTitle: Option[String],
|
||||||
|
originalTitle: Option[String],
|
||||||
|
bookTitle: Option[String],
|
||||||
|
year: Option[Int],
|
||||||
|
date: Option[String],
|
||||||
|
onlineDate: Option[String],
|
||||||
|
publisher: Option[String],
|
||||||
|
journalId: Option[Long],
|
||||||
|
journalName: Option[String],
|
||||||
|
journalIssn: Option[String],
|
||||||
|
journalPublisher: Option[String],
|
||||||
|
conferenceSeriesId: Option[Long],
|
||||||
|
conferenceInstanceId: Option[Long],
|
||||||
|
conferenceName: Option[String],
|
||||||
|
conferenceLocation: Option[String],
|
||||||
|
conferenceStartDate: Option[String],
|
||||||
|
conferenceEndDate: Option[String],
|
||||||
|
volume: Option[String],
|
||||||
|
issue: Option[String],
|
||||||
|
firstPage: Option[String],
|
||||||
|
lastPage: Option[String],
|
||||||
|
referenceCount: Option[Long],
|
||||||
|
citationCount: Option[Long],
|
||||||
|
estimatedCitation: Option[Long],
|
||||||
|
originalVenue: Option[String],
|
||||||
|
familyId: Option[Long],
|
||||||
|
familyRank: Option[Int],
|
||||||
|
docSubTypes: Option[String],
|
||||||
|
createdDate: Option[String],
|
||||||
|
abstractText: Option[String],
|
||||||
|
authors: Option[List[MAGAuthor]],
|
||||||
|
urls: Option[List[String]]
|
||||||
|
)
|
||||||
|
|
||||||
|
case class MAGAuthor(
|
||||||
|
AffiliationId: Option[Long],
|
||||||
|
AuthorSequenceNumber: Option[Int],
|
||||||
|
AffiliationName: Option[String],
|
||||||
|
AuthorName: Option[String],
|
||||||
|
AuthorId: Option[Long],
|
||||||
|
GridId: Option[String]
|
||||||
|
)
|
||||||
|
|
||||||
|
object MagUtility extends Serializable {
|
||||||
|
|
||||||
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
private val MAGCollectedFrom = keyValue(ModelConstants.MAG_ID, ModelConstants.MAG_NAME)
|
||||||
|
|
||||||
|
private val MAGDataInfo: DataInfo = {
|
||||||
|
val di = new DataInfo
|
||||||
|
di.setDeletedbyinference(false)
|
||||||
|
di.setInferred(false)
|
||||||
|
di.setInvisible(false)
|
||||||
|
di.setTrust("0.9")
|
||||||
|
di.setProvenanceaction(
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS
|
||||||
|
)
|
||||||
|
)
|
||||||
|
di
|
||||||
|
}
|
||||||
|
|
||||||
|
private val MAGDataInfoInvisible: DataInfo = {
|
||||||
|
val di = new DataInfo
|
||||||
|
di.setDeletedbyinference(false)
|
||||||
|
di.setInferred(false)
|
||||||
|
di.setInvisible(true)
|
||||||
|
di.setTrust("0.9")
|
||||||
|
di.setProvenanceaction(
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS
|
||||||
|
)
|
||||||
|
)
|
||||||
|
di
|
||||||
|
}
|
||||||
|
|
||||||
|
val datatypedict = Map(
|
||||||
|
"bool" -> BooleanType,
|
||||||
|
"int" -> IntegerType,
|
||||||
|
"uint" -> IntegerType,
|
||||||
|
"long" -> LongType,
|
||||||
|
"ulong" -> LongType,
|
||||||
|
"float" -> FloatType,
|
||||||
|
"string" -> StringType,
|
||||||
|
"DateTime" -> DateType
|
||||||
|
)
|
||||||
|
|
||||||
|
val stream: Map[String, (String, Seq[String])] = Map(
|
||||||
|
"Affiliations" -> Tuple2(
|
||||||
|
"mag/Affiliations.txt",
|
||||||
|
Seq(
|
||||||
|
"AffiliationId:long",
|
||||||
|
"Rank:uint",
|
||||||
|
"NormalizedName:string",
|
||||||
|
"DisplayName:string",
|
||||||
|
"GridId:string",
|
||||||
|
"OfficialPage:string",
|
||||||
|
"WikiPage:string",
|
||||||
|
"PaperCount:long",
|
||||||
|
"PaperFamilyCount:long",
|
||||||
|
"CitationCount:long",
|
||||||
|
"Iso3166Code:string",
|
||||||
|
"Latitude:float?",
|
||||||
|
"Longitude:float?",
|
||||||
|
"CreatedDate:DateTime"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"AuthorExtendedAttributes" -> Tuple2(
|
||||||
|
"mag/AuthorExtendedAttributes.txt",
|
||||||
|
Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")
|
||||||
|
),
|
||||||
|
"Authors" -> Tuple2(
|
||||||
|
"mag/Authors.txt",
|
||||||
|
Seq(
|
||||||
|
"AuthorId:long",
|
||||||
|
"Rank:uint",
|
||||||
|
"NormalizedName:string",
|
||||||
|
"DisplayName:string",
|
||||||
|
"LastKnownAffiliationId:long?",
|
||||||
|
"PaperCount:long",
|
||||||
|
"PaperFamilyCount:long",
|
||||||
|
"CitationCount:long",
|
||||||
|
"CreatedDate:DateTime"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"ConferenceInstances" -> Tuple2(
|
||||||
|
"mag/ConferenceInstances.txt",
|
||||||
|
Seq(
|
||||||
|
"ConferenceInstanceId:long",
|
||||||
|
"NormalizedName:string",
|
||||||
|
"DisplayName:string",
|
||||||
|
"ConferenceSeriesId:long",
|
||||||
|
"Location:string",
|
||||||
|
"OfficialUrl:string",
|
||||||
|
"StartDate:DateTime?",
|
||||||
|
"EndDate:DateTime?",
|
||||||
|
"AbstractRegistrationDate:DateTime?",
|
||||||
|
"SubmissionDeadlineDate:DateTime?",
|
||||||
|
"NotificationDueDate:DateTime?",
|
||||||
|
"FinalVersionDueDate:DateTime?",
|
||||||
|
"PaperCount:long",
|
||||||
|
"PaperFamilyCount:long",
|
||||||
|
"CitationCount:long",
|
||||||
|
"Latitude:float?",
|
||||||
|
"Longitude:float?",
|
||||||
|
"CreatedDate:DateTime"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"ConferenceSeries" -> Tuple2(
|
||||||
|
"mag/ConferenceSeries.txt",
|
||||||
|
Seq(
|
||||||
|
"ConferenceSeriesId:long",
|
||||||
|
"Rank:uint",
|
||||||
|
"NormalizedName:string",
|
||||||
|
"DisplayName:string",
|
||||||
|
"PaperCount:long",
|
||||||
|
"PaperFamilyCount:long",
|
||||||
|
"CitationCount:long",
|
||||||
|
"CreatedDate:DateTime"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"EntityRelatedEntities" -> Tuple2(
|
||||||
|
"advanced/EntityRelatedEntities.txt",
|
||||||
|
Seq(
|
||||||
|
"EntityId:long",
|
||||||
|
"EntityType:string",
|
||||||
|
"RelatedEntityId:long",
|
||||||
|
"RelatedEntityType:string",
|
||||||
|
"RelatedType:int",
|
||||||
|
"Score:float"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"FieldOfStudyChildren" -> Tuple2(
|
||||||
|
"advanced/FieldOfStudyChildren.txt",
|
||||||
|
Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")
|
||||||
|
),
|
||||||
|
"FieldOfStudyExtendedAttributes" -> Tuple2(
|
||||||
|
"advanced/FieldOfStudyExtendedAttributes.txt",
|
||||||
|
Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")
|
||||||
|
),
|
||||||
|
"FieldsOfStudy" -> Tuple2(
|
||||||
|
"advanced/FieldsOfStudy.txt",
|
||||||
|
Seq(
|
||||||
|
"FieldOfStudyId:long",
|
||||||
|
"Rank:uint",
|
||||||
|
"NormalizedName:string",
|
||||||
|
"DisplayName:string",
|
||||||
|
"MainType:string",
|
||||||
|
"Level:int",
|
||||||
|
"PaperCount:long",
|
||||||
|
"PaperFamilyCount:long",
|
||||||
|
"CitationCount:long",
|
||||||
|
"CreatedDate:DateTime"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"Journals" -> Tuple2(
|
||||||
|
"mag/Journals.txt",
|
||||||
|
Seq(
|
||||||
|
"JournalId:long",
|
||||||
|
"Rank:uint",
|
||||||
|
"NormalizedName:string",
|
||||||
|
"DisplayName:string",
|
||||||
|
"Issn:string",
|
||||||
|
"Publisher:string",
|
||||||
|
"Webpage:string",
|
||||||
|
"PaperCount:long",
|
||||||
|
"PaperFamilyCount:long",
|
||||||
|
"CitationCount:long",
|
||||||
|
"CreatedDate:DateTime"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"PaperAbstractsInvertedIndex" -> Tuple2(
|
||||||
|
"nlp/PaperAbstractsInvertedIndex.txt.*",
|
||||||
|
Seq("PaperId:long", "IndexedAbstract:string")
|
||||||
|
),
|
||||||
|
"PaperAuthorAffiliations" -> Tuple2(
|
||||||
|
"mag/PaperAuthorAffiliations.txt",
|
||||||
|
Seq(
|
||||||
|
"PaperId:long",
|
||||||
|
"AuthorId:long",
|
||||||
|
"AffiliationId:long?",
|
||||||
|
"AuthorSequenceNumber:uint",
|
||||||
|
"OriginalAuthor:string",
|
||||||
|
"OriginalAffiliation:string"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"PaperCitationContexts" -> Tuple2(
|
||||||
|
"nlp/PaperCitationContexts.txt",
|
||||||
|
Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")
|
||||||
|
),
|
||||||
|
"PaperExtendedAttributes" -> Tuple2(
|
||||||
|
"mag/PaperExtendedAttributes.txt",
|
||||||
|
Seq("PaperId:long", "AttributeType:int", "AttributeValue:string")
|
||||||
|
),
|
||||||
|
"PaperFieldsOfStudy" -> Tuple2(
|
||||||
|
"advanced/PaperFieldsOfStudy.txt",
|
||||||
|
Seq("PaperId:long", "FieldOfStudyId:long", "Score:float")
|
||||||
|
),
|
||||||
|
"PaperMeSH" -> Tuple2(
|
||||||
|
"advanced/PaperMeSH.txt",
|
||||||
|
Seq(
|
||||||
|
"PaperId:long",
|
||||||
|
"DescriptorUI:string",
|
||||||
|
"DescriptorName:string",
|
||||||
|
"QualifierUI:string",
|
||||||
|
"QualifierName:string",
|
||||||
|
"IsMajorTopic:bool"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"PaperRecommendations" -> Tuple2(
|
||||||
|
"advanced/PaperRecommendations.txt",
|
||||||
|
Seq("PaperId:long", "RecommendedPaperId:long", "Score:float")
|
||||||
|
),
|
||||||
|
"PaperReferences" -> Tuple2(
|
||||||
|
"mag/PaperReferences.txt",
|
||||||
|
Seq("PaperId:long", "PaperReferenceId:long")
|
||||||
|
),
|
||||||
|
"PaperResources" -> Tuple2(
|
||||||
|
"mag/PaperResources.txt",
|
||||||
|
Seq(
|
||||||
|
"PaperId:long",
|
||||||
|
"ResourceType:int",
|
||||||
|
"ResourceUrl:string",
|
||||||
|
"SourceUrl:string",
|
||||||
|
"RelationshipType:int"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"PaperUrls" -> Tuple2(
|
||||||
|
"mag/PaperUrls.txt",
|
||||||
|
Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")
|
||||||
|
),
|
||||||
|
"Papers" -> Tuple2(
|
||||||
|
"mag/Papers.txt",
|
||||||
|
Seq(
|
||||||
|
"PaperId:long",
|
||||||
|
"Rank:uint",
|
||||||
|
"Doi:string",
|
||||||
|
"DocType:string",
|
||||||
|
"PaperTitle:string",
|
||||||
|
"OriginalTitle:string",
|
||||||
|
"BookTitle:string",
|
||||||
|
"Year:int?",
|
||||||
|
"Date:DateTime?",
|
||||||
|
"OnlineDate:DateTime?",
|
||||||
|
"Publisher:string",
|
||||||
|
"JournalId:long?",
|
||||||
|
"ConferenceSeriesId:long?",
|
||||||
|
"ConferenceInstanceId:long?",
|
||||||
|
"Volume:string",
|
||||||
|
"Issue:string",
|
||||||
|
"FirstPage:string",
|
||||||
|
"LastPage:string",
|
||||||
|
"ReferenceCount:long",
|
||||||
|
"CitationCount:long",
|
||||||
|
"EstimatedCitation:long",
|
||||||
|
"OriginalVenue:string",
|
||||||
|
"FamilyId:long?",
|
||||||
|
"FamilyRank:uint?",
|
||||||
|
"DocSubTypes:string",
|
||||||
|
"CreatedDate:DateTime"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"RelatedFieldOfStudy" -> Tuple2(
|
||||||
|
"advanced/RelatedFieldOfStudy.txt",
|
||||||
|
Seq(
|
||||||
|
"FieldOfStudyId1:long",
|
||||||
|
"Type1:string",
|
||||||
|
"FieldOfStudyId2:long",
|
||||||
|
"Type2:string",
|
||||||
|
"Rank:float"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def getSchema(streamName: String): StructType = {
|
||||||
|
var schema = new StructType()
|
||||||
|
val d: Seq[String] = stream(streamName)._2
|
||||||
|
d.foreach { t =>
|
||||||
|
val currentType = t.split(":")
|
||||||
|
val fieldName: String = currentType.head
|
||||||
|
var fieldType: String = currentType.last
|
||||||
|
val nullable: Boolean = fieldType.endsWith("?")
|
||||||
|
if (nullable)
|
||||||
|
fieldType = fieldType.replace("?", "")
|
||||||
|
schema = schema.add(StructField(fieldName, datatypedict(fieldType), nullable))
|
||||||
|
}
|
||||||
|
schema
|
||||||
|
}
|
||||||
|
|
||||||
|
def loadMagEntity(spark: SparkSession, entity: String, basePath: String): Dataset[Row] = {
|
||||||
|
if (stream.contains(entity)) {
|
||||||
|
val s = getSchema(entity)
|
||||||
|
val pt = stream(entity)._1
|
||||||
|
spark.read
|
||||||
|
.option("header", "false")
|
||||||
|
.option("charset", "UTF8")
|
||||||
|
.option("delimiter", "\t")
|
||||||
|
.schema(s)
|
||||||
|
.csv(s"$basePath/$pt")
|
||||||
|
} else
|
||||||
|
null
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def createResultFromType(magType: Option[String], source: Option[String]): Result = {
|
||||||
|
var result: Result = null
|
||||||
|
|
||||||
|
if (magType == null || magType.orNull == null) {
|
||||||
|
result = new Publication
|
||||||
|
result.setDataInfo(MAGDataInfo)
|
||||||
|
val i = new Instance
|
||||||
|
i.setInstancetype(
|
||||||
|
qualifier(
|
||||||
|
"0038",
|
||||||
|
"Other literature type",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
result.setInstance(List(i).asJava)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
val currentType: String = magType.get
|
||||||
|
|
||||||
|
val tp = currentType.toLowerCase match {
|
||||||
|
case "book" =>
|
||||||
|
result = new Publication
|
||||||
|
qualifier("0002", "Book", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)
|
||||||
|
case "bookchapter" =>
|
||||||
|
result = new Publication
|
||||||
|
qualifier(
|
||||||
|
"00013",
|
||||||
|
"Part of book or chapter of book",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
case "journal" =>
|
||||||
|
result = new Publication
|
||||||
|
qualifier("0043", "Journal", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)
|
||||||
|
case "patent" =>
|
||||||
|
if (source != null && source.orNull != null) {
|
||||||
|
val s = source.get.toLowerCase
|
||||||
|
if (s.contains("patent") || s.contains("brevet")) {
|
||||||
|
result = new Publication
|
||||||
|
qualifier(
|
||||||
|
"0019",
|
||||||
|
"Patent",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
} else if (s.contains("journal of")) {
|
||||||
|
result = new Publication
|
||||||
|
qualifier(
|
||||||
|
"0043",
|
||||||
|
"Journal",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
} else if (
|
||||||
|
s.contains("proceedings") || s.contains("conference") || s.contains("workshop") || s.contains(
|
||||||
|
"symposium"
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
result = new Publication
|
||||||
|
qualifier(
|
||||||
|
"0001",
|
||||||
|
"Article",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
} else null
|
||||||
|
} else null
|
||||||
|
|
||||||
|
case "repository" =>
|
||||||
|
result = new Publication()
|
||||||
|
result.setDataInfo(MAGDataInfoInvisible)
|
||||||
|
qualifier(
|
||||||
|
"0038",
|
||||||
|
"Other literature type",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
|
||||||
|
case "thesis" =>
|
||||||
|
result = new Publication
|
||||||
|
qualifier(
|
||||||
|
"0044",
|
||||||
|
"Thesis",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
case "dataset" =>
|
||||||
|
result = new OafDataset
|
||||||
|
qualifier(
|
||||||
|
"0021",
|
||||||
|
"Dataset",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
case "conference" =>
|
||||||
|
result = new Publication
|
||||||
|
qualifier(
|
||||||
|
"0001",
|
||||||
|
"Article",
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result != null) {
|
||||||
|
if (result.getDataInfo == null)
|
||||||
|
result.setDataInfo(MAGDataInfo)
|
||||||
|
val i = new Instance
|
||||||
|
i.setInstancetype(tp)
|
||||||
|
i.setInstanceTypeMapping(
|
||||||
|
List(instanceTypeMapping(currentType, ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)).asJava
|
||||||
|
)
|
||||||
|
result.setInstance(List(i).asJava)
|
||||||
|
}
|
||||||
|
result
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def convertMAGtoOAF(paper: MAGPaper): String = {
|
||||||
|
|
||||||
|
// FILTER all the MAG paper with no URL
|
||||||
|
if (paper.urls.orNull == null)
|
||||||
|
return null
|
||||||
|
|
||||||
|
val result = createResultFromType(paper.docType, paper.originalVenue)
|
||||||
|
if (result == null)
|
||||||
|
return null
|
||||||
|
|
||||||
|
result.setCollectedfrom(List(MAGCollectedFrom).asJava)
|
||||||
|
val pidList = List(
|
||||||
|
structuredProperty(
|
||||||
|
paper.paperId.get.toString,
|
||||||
|
qualifier(
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
result.setPid(pidList.asJava)
|
||||||
|
|
||||||
|
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||||
|
|
||||||
|
result.setId(s"50|mag_________::${DHPUtils.md5(paper.paperId.get.toString)}")
|
||||||
|
|
||||||
|
val originalTitles = structuredProperty(paper.paperTitle.get, ModelConstants.MAIN_TITLE_QUALIFIER, null)
|
||||||
|
|
||||||
|
result.setTitle(List(originalTitles).asJava)
|
||||||
|
|
||||||
|
if (paper.date.orNull != null) {
|
||||||
|
result.setDateofacceptance(field(paper.date.get, null))
|
||||||
|
} else {
|
||||||
|
if (paper.year.isDefined && paper.year.get > 1700) {
|
||||||
|
result.setDateofacceptance(field(s"${paper.year.get}-01-01", null))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (paper.onlineDate.orNull != null) {
|
||||||
|
result.setRelevantdate(
|
||||||
|
List(
|
||||||
|
structuredProperty(
|
||||||
|
paper.onlineDate.get,
|
||||||
|
qualifier(
|
||||||
|
"published-online",
|
||||||
|
"published-online",
|
||||||
|
ModelConstants.DNET_DATACITE_DATE,
|
||||||
|
ModelConstants.DNET_DATACITE_DATE
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
).asJava
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (paper.publisher.orNull != null) {
|
||||||
|
result.setPublisher(field(paper.publisher.get, null))
|
||||||
|
}
|
||||||
|
|
||||||
|
if (paper.date.isDefined)
|
||||||
|
result.setDateofacceptance(field(paper.date.get, null))
|
||||||
|
if (paper.onlineDate.orNull != null)
|
||||||
|
result.setRelevantdate(
|
||||||
|
List(
|
||||||
|
structuredProperty(
|
||||||
|
paper.onlineDate.get,
|
||||||
|
qualifier(
|
||||||
|
"published-online",
|
||||||
|
"published-online",
|
||||||
|
ModelConstants.DNET_DATACITE_DATE,
|
||||||
|
ModelConstants.DNET_DATACITE_DATE
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
).asJava
|
||||||
|
)
|
||||||
|
|
||||||
|
if (paper.publisher.isDefined)
|
||||||
|
result.setPublisher(field(paper.publisher.get, null))
|
||||||
|
|
||||||
|
if (paper.journalId.isDefined && paper.journalName.isDefined) {
|
||||||
|
val j = new Journal
|
||||||
|
|
||||||
|
j.setName(paper.journalName.get)
|
||||||
|
j.setSp(paper.firstPage.orNull)
|
||||||
|
j.setEp(paper.lastPage.orNull)
|
||||||
|
if (paper.publisher.isDefined)
|
||||||
|
result.setPublisher(field(paper.publisher.get, null))
|
||||||
|
j.setIssnPrinted(paper.journalIssn.orNull)
|
||||||
|
j.setVol(paper.volume.orNull)
|
||||||
|
j.setIss(paper.issue.orNull)
|
||||||
|
j.setConferenceplace(paper.conferenceLocation.orNull)
|
||||||
|
result match {
|
||||||
|
case publication: Publication => publication.setJournal(j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (paper.abstractText.isDefined)
|
||||||
|
result.setDescription(List(field(paper.abstractText.get, null)).asJava)
|
||||||
|
if (paper.authors.isDefined && paper.authors.get.nonEmpty) {
|
||||||
|
result.setAuthor(
|
||||||
|
paper.authors.get
|
||||||
|
.filter(a => a.AuthorName.isDefined)
|
||||||
|
.map(a => {
|
||||||
|
val author = new Author
|
||||||
|
author.setFullname(a.AuthorName.get)
|
||||||
|
author
|
||||||
|
})
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
val instance = result.getInstance().get(0)
|
||||||
|
instance.setPid(pidList.asJava)
|
||||||
|
if (paper.doi.orNull != null)
|
||||||
|
instance.setAlternateIdentifier(
|
||||||
|
List(
|
||||||
|
structuredProperty(
|
||||||
|
paper.doi.get,
|
||||||
|
qualifier(
|
||||||
|
PidType.doi.toString,
|
||||||
|
PidType.doi.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
).asJava
|
||||||
|
)
|
||||||
|
instance.setUrl(paper.urls.get.asJava)
|
||||||
|
instance.setHostedby(ModelConstants.UNKNOWN_REPOSITORY)
|
||||||
|
instance.setCollectedfrom(MAGCollectedFrom)
|
||||||
|
instance.setAccessright(
|
||||||
|
accessRight(
|
||||||
|
ModelConstants.UNKNOWN,
|
||||||
|
ModelConstants.NOT_AVAILABLE,
|
||||||
|
ModelConstants.DNET_ACCESS_MODES,
|
||||||
|
ModelConstants.DNET_ACCESS_MODES
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (paper.authors.orNull != null && paper.authors.get.nonEmpty)
|
||||||
|
result.setAuthor(
|
||||||
|
paper.authors.get
|
||||||
|
.filter(a => a.AuthorName.orNull != null)
|
||||||
|
.map { a =>
|
||||||
|
val author = new Author
|
||||||
|
author.setFullname(a.AuthorName.get)
|
||||||
|
var authorPid = List(
|
||||||
|
structuredProperty(
|
||||||
|
a.AuthorId.get.toString,
|
||||||
|
qualifier(
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if (a.GridId.orNull != null) {
|
||||||
|
authorPid = authorPid ::: List(
|
||||||
|
structuredProperty(
|
||||||
|
a.AuthorId.get.toString,
|
||||||
|
qualifier(
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
author.setPid(authorPid.asJava)
|
||||||
|
author
|
||||||
|
}
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
mapper.writeValueAsString(result)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateOrganization(r: Row): String = {
|
||||||
|
|
||||||
|
val o = new Organization
|
||||||
|
val affId = s"20|mag_________::${DHPUtils.md5(r.getAs[Long]("AffiliationId").toString)}"
|
||||||
|
o.setId(affId)
|
||||||
|
o.setDataInfo(MAGDataInfo)
|
||||||
|
o.setCollectedfrom(List(MAGCollectedFrom).asJava)
|
||||||
|
o.setLegalname(field(r.getAs[String]("DisplayName"), null))
|
||||||
|
val gid = r.getAs[String]("GridId")
|
||||||
|
if (gid != null) {
|
||||||
|
o.setPid(List(
|
||||||
|
structuredProperty(gid, qualifier(
|
||||||
|
PidType.GRID.toString,
|
||||||
|
PidType.GRID.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null),
|
||||||
|
structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier(
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null)
|
||||||
|
|
||||||
|
).asJava)
|
||||||
|
} else {
|
||||||
|
o.setPid(List(
|
||||||
|
structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier(
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
PidType.mag_id.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null)
|
||||||
|
).asJava)
|
||||||
|
}
|
||||||
|
val c = r.getAs[String]("Iso3166Code")
|
||||||
|
if (c != null)
|
||||||
|
o.setCountry(qualifier(c, c, "dnet:countries", "dnet:countries"))
|
||||||
|
else
|
||||||
|
o.setCountry(ModelConstants.UNKNOWN_COUNTRY)
|
||||||
|
val ws = r.getAs[String]("OfficialPage")
|
||||||
|
if (ws != null)
|
||||||
|
o.setWebsiteurl(field(ws, null))
|
||||||
|
val a = new AtomicAction[Organization]()
|
||||||
|
a.setClazz(classOf[Organization])
|
||||||
|
a.setPayload(o)
|
||||||
|
mapper.writeValueAsString(a)
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateAffiliationRelations(paperAffiliation: Row): List[Relation] = {
|
||||||
|
|
||||||
|
val affId = s"20|mag_________::${DHPUtils.md5(paperAffiliation.getAs[Long]("AffiliationId").toString)}"
|
||||||
|
val oafId = s"50|mag_________::${DHPUtils.md5(paperAffiliation.getAs[Long]("PaperId").toString)}"
|
||||||
|
val r: Relation = new Relation
|
||||||
|
r.setSource(oafId)
|
||||||
|
r.setTarget(affId)
|
||||||
|
r.setRelType(ModelConstants.RESULT_ORGANIZATION)
|
||||||
|
r.setRelClass(ModelConstants.HAS_AUTHOR_INSTITUTION)
|
||||||
|
r.setSubRelType(ModelConstants.AFFILIATION)
|
||||||
|
r.setDataInfo(MAGDataInfo)
|
||||||
|
r.setCollectedfrom(List(MAGCollectedFrom).asJava)
|
||||||
|
val r1: Relation = new Relation
|
||||||
|
r1.setTarget(oafId)
|
||||||
|
r1.setSource(affId)
|
||||||
|
r1.setRelType(ModelConstants.RESULT_ORGANIZATION)
|
||||||
|
r1.setRelClass(ModelConstants.IS_AUTHOR_INSTITUTION_OF)
|
||||||
|
r1.setSubRelType(ModelConstants.AFFILIATION)
|
||||||
|
r1.setDataInfo(MAGDataInfo)
|
||||||
|
r1.setCollectedfrom(List(MAGCollectedFrom).asJava)
|
||||||
|
List(r, r1)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def convertInvertedIndexString(json_input: String): String = {
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
lazy val json: json4s.JValue = parse(json_input)
|
||||||
|
val idl = (json \ "IndexLength").extract[Int]
|
||||||
|
if (idl > 0) {
|
||||||
|
val res = Array.ofDim[String](idl)
|
||||||
|
|
||||||
|
val iid = (json \ "InvertedIndex").extract[Map[String, List[Int]]]
|
||||||
|
|
||||||
|
for { (k: String, v: List[Int]) <- iid } {
|
||||||
|
v.foreach(item => res(item) = k)
|
||||||
|
}
|
||||||
|
(0 until idl).foreach(i => {
|
||||||
|
if (res(i) == null)
|
||||||
|
res(i) = ""
|
||||||
|
})
|
||||||
|
return res.mkString(" ")
|
||||||
|
}
|
||||||
|
""
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,199 @@
|
||||||
|
package eu.dnetlib.dhp.collection.mag
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import org.apache.spark.sql.functions._
|
||||||
|
import org.apache.spark.sql.types.{StringType, StructField, StructType}
|
||||||
|
import org.apache.spark.sql.{Dataset, Row, SparkSession}
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
class SparkCreateMagDenormalizedTable(propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
val magBasePath: String = parser.get("magBasePath")
|
||||||
|
log.info("found parameters magBasePath: {}", magBasePath)
|
||||||
|
generatedDenormalizedMAGTable(spark, magBasePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def generatedDenormalizedMAGTable(
|
||||||
|
spark: SparkSession,
|
||||||
|
magBasePath: String
|
||||||
|
): Unit = {
|
||||||
|
|
||||||
|
import spark.implicits._
|
||||||
|
val schema: StructType = StructType(StructField("DOI", StringType) :: Nil)
|
||||||
|
|
||||||
|
//Filter all the MAG Papers that intersect with a Crossref DOI
|
||||||
|
|
||||||
|
val magPapers = MagUtility
|
||||||
|
.loadMagEntity(spark, "Papers", magBasePath)
|
||||||
|
.withColumn("Doi", lower(col("Doi")))
|
||||||
|
|
||||||
|
magPapers.cache()
|
||||||
|
magPapers.count()
|
||||||
|
//log.info("Create current abstract")
|
||||||
|
|
||||||
|
//Abstract is an inverted list, we define a function that convert in string the abstract and recreate
|
||||||
|
// a table(PaperId, Abstract)
|
||||||
|
val paperAbstract = MagUtility
|
||||||
|
.loadMagEntity(spark, "PaperAbstractsInvertedIndex", magBasePath)
|
||||||
|
.map(s => (s.getLong(0), MagUtility.convertInvertedIndexString(s.getString(1))))
|
||||||
|
.withColumnRenamed("_1", "PaperId")
|
||||||
|
.withColumnRenamed("_2", "Abstract")
|
||||||
|
|
||||||
|
//We define Step0 as the result of the Join between PaperIntersection and the PaperAbstract
|
||||||
|
|
||||||
|
val step0 = magPapers
|
||||||
|
.join(paperAbstract, magPapers("PaperId") === paperAbstract("PaperId"), "left")
|
||||||
|
.select(magPapers("*"), paperAbstract("Abstract"))
|
||||||
|
.cache()
|
||||||
|
|
||||||
|
step0.count()
|
||||||
|
|
||||||
|
magPapers.unpersist()
|
||||||
|
|
||||||
|
// We have three table Author, Affiliation, and PaperAuthorAffiliation, in the
|
||||||
|
//next step we create a table containing
|
||||||
|
val authors = MagUtility.loadMagEntity(spark, "Authors", magBasePath)
|
||||||
|
val affiliations = MagUtility.loadMagEntity(spark, "Affiliations", magBasePath)
|
||||||
|
val paperAuthorAffiliations = MagUtility.loadMagEntity(spark, "PaperAuthorAffiliations", magBasePath)
|
||||||
|
|
||||||
|
val j1 = paperAuthorAffiliations
|
||||||
|
.join(authors, paperAuthorAffiliations("AuthorId") === authors("AuthorId"), "inner")
|
||||||
|
.select(
|
||||||
|
col("PaperId"),
|
||||||
|
col("AffiliationId"),
|
||||||
|
col("AuthorSequenceNumber"),
|
||||||
|
authors("DisplayName").alias("AuthorName"),
|
||||||
|
authors("AuthorId")
|
||||||
|
)
|
||||||
|
|
||||||
|
val paperAuthorAffiliationNormalized = j1
|
||||||
|
.join(affiliations, j1("AffiliationId") === affiliations("AffiliationId"), "left")
|
||||||
|
.select(j1("*"), affiliations("DisplayName").alias("AffiliationName"), affiliations("GridId"))
|
||||||
|
.groupBy("PaperId")
|
||||||
|
.agg(
|
||||||
|
collect_list(
|
||||||
|
struct("AffiliationId", "AuthorSequenceNumber", "AffiliationName", "AuthorName", "AuthorId", "GridId")
|
||||||
|
).alias("authors")
|
||||||
|
)
|
||||||
|
val step1 = step0
|
||||||
|
.join(paperAuthorAffiliationNormalized, step0("PaperId") === paperAuthorAffiliationNormalized("PaperId"), "left")
|
||||||
|
.select(step0("*"), paperAuthorAffiliationNormalized("authors"))
|
||||||
|
.cache()
|
||||||
|
step1.count()
|
||||||
|
|
||||||
|
step0.unpersist()
|
||||||
|
|
||||||
|
val conference = MagUtility
|
||||||
|
.loadMagEntity(spark, "ConferenceInstances", magBasePath)
|
||||||
|
.select(
|
||||||
|
$"ConferenceInstanceId",
|
||||||
|
$"DisplayName".as("conferenceName"),
|
||||||
|
$"Location".as("conferenceLocation"),
|
||||||
|
$"StartDate".as("conferenceStartDate"),
|
||||||
|
$"EndDate".as("conferenceEndDate")
|
||||||
|
)
|
||||||
|
|
||||||
|
val step2 = step1
|
||||||
|
.join(conference, step1("ConferenceInstanceId") === conference("ConferenceInstanceId"), "left")
|
||||||
|
.select(
|
||||||
|
step1("*"),
|
||||||
|
conference("conferenceName"),
|
||||||
|
conference("conferenceLocation"),
|
||||||
|
conference("conferenceStartDate"),
|
||||||
|
conference("conferenceEndDate")
|
||||||
|
)
|
||||||
|
.cache()
|
||||||
|
step2.count()
|
||||||
|
step1.unpersist()
|
||||||
|
|
||||||
|
val journals = MagUtility
|
||||||
|
.loadMagEntity(spark, "Journals", magBasePath)
|
||||||
|
.select(
|
||||||
|
$"JournalId",
|
||||||
|
$"DisplayName".as("journalName"),
|
||||||
|
$"Issn".as("journalIssn"),
|
||||||
|
$"Publisher".as("journalPublisher")
|
||||||
|
)
|
||||||
|
val step3 = step2
|
||||||
|
.join(journals, step2("JournalId") === journals("JournalId"), "left")
|
||||||
|
.select(
|
||||||
|
step2("*"),
|
||||||
|
journals("journalName"),
|
||||||
|
journals("journalIssn"),
|
||||||
|
journals("journalPublisher")
|
||||||
|
)
|
||||||
|
.cache
|
||||||
|
step3.count()
|
||||||
|
|
||||||
|
val paper_urls = MagUtility
|
||||||
|
.loadMagEntity(spark, "PaperUrls", magBasePath)
|
||||||
|
.groupBy("PaperId")
|
||||||
|
.agg(slice(collect_set("SourceUrl"), 1, 6).alias("urls"))
|
||||||
|
.cache
|
||||||
|
|
||||||
|
paper_urls.count
|
||||||
|
|
||||||
|
step3
|
||||||
|
.join(paper_urls, step3("PaperId") === paper_urls("PaperId"))
|
||||||
|
.select(step3("*"), paper_urls("urls"))
|
||||||
|
.select(
|
||||||
|
$"PaperId".as("paperId"),
|
||||||
|
$"Doi".as("doi"),
|
||||||
|
$"DocType".as("docType"),
|
||||||
|
$"PaperTitle".as("paperTitle"),
|
||||||
|
$"OriginalTitle".as("originalTitle"),
|
||||||
|
$"BookTitle".as("bookTitle"),
|
||||||
|
$"Year".as("year"),
|
||||||
|
$"Date".as("date"),
|
||||||
|
$"OnlineDate".as("onlineDate"),
|
||||||
|
$"Publisher".as("publisher"),
|
||||||
|
$"JournalId".as("journalId"),
|
||||||
|
$"ConferenceSeriesId".as("conferenceSeriesId"),
|
||||||
|
$"ConferenceInstanceId".as("conferenceInstanceId"),
|
||||||
|
$"Volume".as("volume"),
|
||||||
|
$"Issue".as("issue"),
|
||||||
|
$"FirstPage".as("firstPage"),
|
||||||
|
$"LastPage".as("lastPage"),
|
||||||
|
$"ReferenceCount".as("referenceCount"),
|
||||||
|
$"CitationCount".as("citationCount"),
|
||||||
|
$"EstimatedCitation".as("estimatedCitation"),
|
||||||
|
$"OriginalVenue".as("originalVenue"),
|
||||||
|
$"FamilyId".as("familyId"),
|
||||||
|
$"FamilyRank".as("familyRank"),
|
||||||
|
$"DocSubTypes".as("docSubTypes"),
|
||||||
|
$"CreatedDate".as("createdDate"),
|
||||||
|
$"Abstract".as("abstractText"),
|
||||||
|
$"authors".as("authors"),
|
||||||
|
$"conferenceName".as("conferenceName"),
|
||||||
|
$"conferenceLocation".as("conferenceLocation"),
|
||||||
|
$"conferenceStartDate".as("conferenceStartDate"),
|
||||||
|
$"conferenceEndDate".as("conferenceEndDate"),
|
||||||
|
$"journalName".as("journalName"),
|
||||||
|
$"journalIssn".as("journalIssn"),
|
||||||
|
$"journalPublisher".as("journalPublisher"),
|
||||||
|
$"urls"
|
||||||
|
)
|
||||||
|
.write
|
||||||
|
.mode("OverWrite")
|
||||||
|
.save(s"$magBasePath/mag_denormalized")
|
||||||
|
step3.unpersist()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object SparkCreateMagDenormalizedTable {
|
||||||
|
|
||||||
|
val log: Logger = LoggerFactory.getLogger(SparkCreateMagDenormalizedTable.getClass)
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
new SparkCreateMagDenormalizedTable(
|
||||||
|
"/eu/dnetlib/dhp/collection/mag/create_MAG_denormalized_table_properties.json",
|
||||||
|
args,
|
||||||
|
log
|
||||||
|
).initialize().run()
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
package eu.dnetlib.dhp.collection.mag
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH
|
||||||
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation
|
||||||
|
import org.apache.spark.sql.functions.col
|
||||||
|
import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructType}
|
||||||
|
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
class SparkMAGtoOAF(propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
||||||
|
log.info(s"mdstoreOutputVersion is '$mdstoreOutputVersion'")
|
||||||
|
|
||||||
|
val mapper = new ObjectMapper()
|
||||||
|
val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
||||||
|
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
||||||
|
log.info(s"outputBasePath is '$outputBasePath'")
|
||||||
|
val mdstorePath = s"$outputBasePath$MDSTORE_DATA_PATH"
|
||||||
|
val magBasePath: String = parser.get("magBasePath")
|
||||||
|
log.info("found parameters magBasePath: {}", magBasePath)
|
||||||
|
convertMAG(spark, magBasePath, mdstorePath)
|
||||||
|
generateAffiliations(spark, magBasePath, mdstorePath)
|
||||||
|
reportTotalSize(mdstorePath, outputBasePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
def convertMAG(spark: SparkSession, magBasePath: String, mdStorePath: String): Unit = {
|
||||||
|
import spark.implicits._
|
||||||
|
|
||||||
|
spark.read
|
||||||
|
.load(s"$magBasePath/mag_denormalized")
|
||||||
|
.as[MAGPaper]
|
||||||
|
.map(s => MagUtility.convertMAGtoOAF(s))
|
||||||
|
.filter(s => s != null)
|
||||||
|
.write
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(mdStorePath)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateAffiliations(spark: SparkSession, magBasePath: String, mdStorePath: String): Unit = {
|
||||||
|
|
||||||
|
implicit val relEncoder: Encoder[Relation] = Encoders.bean(classOf[Relation])
|
||||||
|
val schema = new StructType()
|
||||||
|
.add(StructField("id", StringType))
|
||||||
|
.add(StructField("originalId", ArrayType(StringType)))
|
||||||
|
val generatedMag =
|
||||||
|
spark.read.schema(schema).json(mdStorePath).selectExpr("explode(originalId) as PaperId").distinct()
|
||||||
|
val paperAuthorAffiliations = MagUtility
|
||||||
|
.loadMagEntity(spark, "PaperAuthorAffiliations", magBasePath)
|
||||||
|
.where(col("AffiliationId").isNotNull)
|
||||||
|
.select("PaperId", "AffiliationId")
|
||||||
|
.distinct
|
||||||
|
paperAuthorAffiliations
|
||||||
|
.join(generatedMag, paperAuthorAffiliations("PaperId") === generatedMag("PaperId"), "leftsemi")
|
||||||
|
.flatMap(r => MagUtility.generateAffiliationRelations(r))
|
||||||
|
.write
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.json(mdStorePath)
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object SparkMAGtoOAF {
|
||||||
|
|
||||||
|
val log: Logger = LoggerFactory.getLogger(SparkMAGtoOAF.getClass)
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
new SparkMAGtoOAF("/eu/dnetlib/dhp/collection/mag/convert_MAG_to_OAF_properties.json", args, log)
|
||||||
|
.initialize()
|
||||||
|
.run()
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
package eu.dnetlib.dhp.collection.mag
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization
|
||||||
|
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
class SparkMagOrganizationAS (propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
val magBasePath:String = parser.get("magBasePath")
|
||||||
|
log.info(s"magBasePath is $magBasePath")
|
||||||
|
val outputPath:String = parser.get("outputPath")
|
||||||
|
log.info(s"outputPath is $outputPath")
|
||||||
|
generateAS(spark,magBasePath, outputPath)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateAS(spark:SparkSession, magBasePath:String,outputPath:String ):Unit = {
|
||||||
|
import spark.implicits._
|
||||||
|
val organizations = MagUtility.loadMagEntity(spark,"Affiliations", magBasePath)
|
||||||
|
organizations.map(r => MagUtility.generateOrganization(r)).write.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(outputPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object SparkMagOrganizationAS{
|
||||||
|
|
||||||
|
val log: Logger = LoggerFactory.getLogger(SparkMagOrganizationAS.getClass)
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
new SparkMagOrganizationAS("/eu/dnetlib/dhp/collection/mag/create_organization_AS.json", args, log)
|
||||||
|
.initialize()
|
||||||
|
.run()
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -46,20 +46,6 @@ class GenerateDataciteDatasetSpark(propertyPath: String, args: Array[String], lo
|
||||||
reportTotalSize(targetPath, outputBasePath)
|
reportTotalSize(targetPath, outputBasePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
/** For working with MDStore we need to store in a file on hdfs the size of
|
|
||||||
* the current dataset
|
|
||||||
* @param targetPath
|
|
||||||
* @param outputBasePath
|
|
||||||
*/
|
|
||||||
def reportTotalSize(targetPath: String, outputBasePath: String): Unit = {
|
|
||||||
val total_items = spark.read.text(targetPath).count()
|
|
||||||
writeHdfsFile(
|
|
||||||
spark.sparkContext.hadoopConfiguration,
|
|
||||||
s"$total_items",
|
|
||||||
outputBasePath + MDSTORE_SIZE_PATH
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generate the transformed and cleaned OAF Dataset from the native one
|
/** Generate the transformed and cleaned OAF Dataset from the native one
|
||||||
*
|
*
|
||||||
* @param sourcePath sourcePath of the native Dataset in format JSON/Datacite
|
* @param sourcePath sourcePath of the native Dataset in format JSON/Datacite
|
||||||
|
|
|
@ -0,0 +1,849 @@
|
||||||
|
{
|
||||||
|
"URL": "http://dx.doi.org/10.1016/j.joca.2019.11.002",
|
||||||
|
"resource": {
|
||||||
|
"primary": {
|
||||||
|
"URL": "https://linkinghub.elsevier.com/retrieve/pii/S106345841931266X"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"member": "78",
|
||||||
|
"score": 0.0,
|
||||||
|
"created": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2019,
|
||||||
|
11,
|
||||||
|
14
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2019-11-14T18:08:38Z",
|
||||||
|
"timestamp": 1573754918000
|
||||||
|
},
|
||||||
|
"update-policy": "http://dx.doi.org/10.1016/elsevier_cm_policy",
|
||||||
|
"license": [
|
||||||
|
{
|
||||||
|
"start": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
5,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2020-05-01T00:00:00Z",
|
||||||
|
"timestamp": 1588291200000
|
||||||
|
},
|
||||||
|
"content-version": "tdm",
|
||||||
|
"delay-in-days": 0,
|
||||||
|
"URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2021,
|
||||||
|
5,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2021-05-01T00:00:00Z",
|
||||||
|
"timestamp": 1619827200000
|
||||||
|
},
|
||||||
|
"content-version": "vor",
|
||||||
|
"delay-in-days": 365,
|
||||||
|
"URL": "http://www.elsevier.com/open-access/userlicense/1.0/"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"ISSN": [
|
||||||
|
"1063-4584"
|
||||||
|
],
|
||||||
|
"container-title": [
|
||||||
|
"Osteoarthritis and Cartilage"
|
||||||
|
],
|
||||||
|
"issued": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
5
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"issue": "5",
|
||||||
|
"prefix": "10.1016",
|
||||||
|
"reference-count": 50,
|
||||||
|
"indexed": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2024,
|
||||||
|
2,
|
||||||
|
27
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2024-02-27T00:38:44Z",
|
||||||
|
"timestamp": 1708994324729
|
||||||
|
},
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"given": "N.",
|
||||||
|
"family": "Sharma",
|
||||||
|
"sequence": "first",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "P.",
|
||||||
|
"family": "Drobinski",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "A.",
|
||||||
|
"family": "Kayed",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "Z.",
|
||||||
|
"family": "Chen",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "C.F.",
|
||||||
|
"family": "Kjelgaard-Petersen",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "T.",
|
||||||
|
"family": "Gantzel",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "M.A.",
|
||||||
|
"family": "Karsdal",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "M.",
|
||||||
|
"family": "Michaelis",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "C.",
|
||||||
|
"family": "Ladel",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "A.C.",
|
||||||
|
"family": "Bay-Jensen",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "S.",
|
||||||
|
"family": "Lindemann",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "C.S.",
|
||||||
|
"family": "Thudium",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"DOI": "10.1016/j.joca.2019.11.002",
|
||||||
|
"is-referenced-by-count": 27,
|
||||||
|
"funder": [
|
||||||
|
{
|
||||||
|
"DOI": "10.13039/501100001732",
|
||||||
|
"name": "Danmarks Grundforskningsfond",
|
||||||
|
"doi-asserted-by": "publisher"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"published": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
5
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"published-print": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
5
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"alternative-id": [
|
||||||
|
"S106345841931266X"
|
||||||
|
],
|
||||||
|
"subject": [
|
||||||
|
"Orthopedics and Sports Medicine",
|
||||||
|
"Biomedical Engineering",
|
||||||
|
"Rheumatology"
|
||||||
|
],
|
||||||
|
"content-domain": {
|
||||||
|
"domain": [
|
||||||
|
"clinicalkey.fr",
|
||||||
|
"clinicalkey.jp",
|
||||||
|
"clinicalkey.es",
|
||||||
|
"clinicalkey.com.au",
|
||||||
|
"oarsijournal.com",
|
||||||
|
"clinicalkey.com",
|
||||||
|
"elsevier.com",
|
||||||
|
"sciencedirect.com"
|
||||||
|
],
|
||||||
|
"crossmark-restriction": true
|
||||||
|
},
|
||||||
|
"reference": [
|
||||||
|
{
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib1",
|
||||||
|
"series-title": "Priority Medicines for Europe and the World. 2013 Update",
|
||||||
|
"author": "Kaplan",
|
||||||
|
"year": "2013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "August 2016",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib2",
|
||||||
|
"article-title": "Osteoarthritis: toward a comprehensive understanding of pathological mechanism",
|
||||||
|
"volume": "5",
|
||||||
|
"author": "Chen",
|
||||||
|
"year": "2017",
|
||||||
|
"journal-title": "Bone Res"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "77",
|
||||||
|
"issue": "2",
|
||||||
|
"author": "Sokolove",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib3",
|
||||||
|
"DOI": "10.1177/1759720X12467868",
|
||||||
|
"article-title": "Role of inflammation in the pathogenesis of osteoarthritis: latest findings and interpretations",
|
||||||
|
"year": "2013",
|
||||||
|
"volume": "5",
|
||||||
|
"journal-title": "Ther Adv Musculoskelet Dis"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"issue": "5",
|
||||||
|
"author": "Alberton",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib4",
|
||||||
|
"DOI": "10.3390/ijms20051008",
|
||||||
|
"article-title": "Aggrecan hypomorphism compromises articular cartilage biomechanical properties and is associated with increased incidence of spontaneous osteoarthritis",
|
||||||
|
"year": "2019",
|
||||||
|
"volume": "20",
|
||||||
|
"journal-title": "Int J Mol Sci"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"issue": "6",
|
||||||
|
"author": "Miller",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib5",
|
||||||
|
"DOI": "10.1172/jci.insight.95704",
|
||||||
|
"article-title": "An aggrecan fragment drives osteoarthritis pain through Toll-like receptor 2",
|
||||||
|
"year": "2018",
|
||||||
|
"volume": "3",
|
||||||
|
"journal-title": "JCI Insight"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "1240",
|
||||||
|
"issue": "5",
|
||||||
|
"author": "Lees",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib6",
|
||||||
|
"DOI": "10.1002/art.39063",
|
||||||
|
"article-title": "Bioactivity in an aggrecan 32-mer fragment is mediated via toll-like receptor 2",
|
||||||
|
"year": "2015",
|
||||||
|
"volume": "67",
|
||||||
|
"journal-title": "Arthritis Rheum"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib7",
|
||||||
|
"article-title": "Increased function of pronociceptive TRPV1 at the level of the joint in a rat model of osteoarthritis pain",
|
||||||
|
"author": "Kelly",
|
||||||
|
"year": "2013",
|
||||||
|
"journal-title": "Ann Rheum Dis"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "580",
|
||||||
|
"issue": "10",
|
||||||
|
"author": "Robinson",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib8",
|
||||||
|
"DOI": "10.1038/nrrheum.2016.136",
|
||||||
|
"article-title": "Low-grade inflammation as a key mediator of the pathogenesis of osteoarthritis",
|
||||||
|
"year": "2016",
|
||||||
|
"volume": "12",
|
||||||
|
"journal-title": "Nat Rev Rheumatol"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "625",
|
||||||
|
"issue": "11",
|
||||||
|
"author": "Sellam",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib9",
|
||||||
|
"DOI": "10.1038/nrrheum.2010.159",
|
||||||
|
"article-title": "The role of synovitis in pathophysiology and clinical symptoms of osteoarthritis",
|
||||||
|
"year": "2010",
|
||||||
|
"volume": "6",
|
||||||
|
"journal-title": "Nat Rev Rheumatol"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "249",
|
||||||
|
"issue": "2",
|
||||||
|
"author": "Scanzello",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib10",
|
||||||
|
"DOI": "10.1016/j.bone.2012.02.012",
|
||||||
|
"article-title": "The role of synovitis in osteoarthritis pathogenesis",
|
||||||
|
"year": "2012",
|
||||||
|
"volume": "51",
|
||||||
|
"journal-title": "Bone"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "33",
|
||||||
|
"issue": "1",
|
||||||
|
"author": "Kapoor",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib11",
|
||||||
|
"DOI": "10.1038/nrrheum.2010.196",
|
||||||
|
"article-title": "Role of proinflammatory cytokines in the pathophysiology of osteoarthritis",
|
||||||
|
"year": "2011",
|
||||||
|
"volume": "7",
|
||||||
|
"journal-title": "Nat Rev Rheumatol"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "4",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib12",
|
||||||
|
"first-page": "1",
|
||||||
|
"volume": "18",
|
||||||
|
"author": "Roh",
|
||||||
|
"year": "2018",
|
||||||
|
"journal-title": "Origin and List of Damps"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib13",
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"article-title": "Overexpression of toll-like receptors 3 and 4 in synovial tissue from patients with early rheumatoid arthritis: toll-like receptor expression in early and longstanding arthritis",
|
||||||
|
"author": "Ospelt",
|
||||||
|
"year": "2008",
|
||||||
|
"journal-title": "Arthritis Rheum",
|
||||||
|
"DOI": "10.1002/art.24140"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "338",
|
||||||
|
"issue": "2",
|
||||||
|
"author": "Roelofs",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib14",
|
||||||
|
"DOI": "10.1002/art.23217",
|
||||||
|
"article-title": "The orchestra of toll-like receptors and their potential role in frequently occurring rheumatic conditions",
|
||||||
|
"year": "2008",
|
||||||
|
"volume": "58",
|
||||||
|
"journal-title": "Arthritis Rheum"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "657",
|
||||||
|
"issue": "3",
|
||||||
|
"author": "Gondokaryono",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib15",
|
||||||
|
"DOI": "10.1189/jlb.1206730",
|
||||||
|
"article-title": "The extra domain A of fibronectin stimulates murine mast cells via Toll-like receptor 4",
|
||||||
|
"year": "2007",
|
||||||
|
"volume": "82",
|
||||||
|
"journal-title": "J Leukoc Biol"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "7",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib16",
|
||||||
|
"first-page": "2004",
|
||||||
|
"article-title": "Chondrocyte innate immune myeloid differentiation factor 88-dependent signaling drives procatabolic effects of the endogenous toll-like receptor 2/toll-like receptor 4 ligands low molecular weight hyaluronan and high mobility group box chromosomal protein",
|
||||||
|
"volume": "62",
|
||||||
|
"author": "Liu-Bryan",
|
||||||
|
"year": "2010",
|
||||||
|
"journal-title": "Arthritis Rheum"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "774",
|
||||||
|
"issue": "7",
|
||||||
|
"author": "Midwood",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib17",
|
||||||
|
"DOI": "10.1038/nm.1987",
|
||||||
|
"article-title": "Tenascin-C is an endogenous activator of Toll-like receptor 4 that is essential for maintaining inflammation in arthritic joint disease",
|
||||||
|
"year": "2009",
|
||||||
|
"volume": "15",
|
||||||
|
"journal-title": "Nat Med"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "16",
|
||||||
|
"author": "Berenbaum",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib18",
|
||||||
|
"DOI": "10.1016/j.joca.2012.11.012",
|
||||||
|
"article-title": "Osteoarthritis as an inflammatory disease (osteoarthritis is not osteoarthrosis!)",
|
||||||
|
"year": "2013",
|
||||||
|
"volume": "21",
|
||||||
|
"journal-title": "Osteoarthr Cartil"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "259",
|
||||||
|
"issue": "4",
|
||||||
|
"author": "Wang",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib19",
|
||||||
|
"DOI": "10.1186/1756-0500-2-259",
|
||||||
|
"article-title": "Suppression of MMP activity in bovine cartilage explants cultures has little if any effect on the release of aggrecanase-derived aggrecan fragments",
|
||||||
|
"year": "2009",
|
||||||
|
"volume": "2",
|
||||||
|
"journal-title": "BMC Res Notes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "423",
|
||||||
|
"issue": "5–6",
|
||||||
|
"author": "Bay-Jensen",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib20",
|
||||||
|
"DOI": "10.1016/j.clinbiochem.2011.01.001",
|
||||||
|
"article-title": "Enzyme-linked immunosorbent assay (ELISAs) for metalloproteinase derived type II collagen neoepitope, CIIM-Increased serum CIIM in subjects with severe radiographic osteoarthritis",
|
||||||
|
"year": "2011",
|
||||||
|
"volume": "44",
|
||||||
|
"journal-title": "Clin Biochem"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "18789",
|
||||||
|
"issue": "10",
|
||||||
|
"author": "Gudmann",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib21",
|
||||||
|
"DOI": "10.3390/ijms151018789",
|
||||||
|
"article-title": "Cartilage turnover reflected by metabolic processing of type II collagen: a novel marker of anabolic function in chondrocytes",
|
||||||
|
"year": "2014",
|
||||||
|
"volume": "15",
|
||||||
|
"journal-title": "Int J Mol Sci"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "899",
|
||||||
|
"issue": "10–11",
|
||||||
|
"author": "Barascuk",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib22",
|
||||||
|
"DOI": "10.1016/j.clinbiochem.2010.03.012",
|
||||||
|
"article-title": "A novel assay for extracellular matrix remodeling associated with liver fibrosis: an enzyme-linked immunosorbent assay (ELISA) for a MMP-9 proteolytically revealed neo-epitope of type III collagen",
|
||||||
|
"year": "2010",
|
||||||
|
"volume": "43",
|
||||||
|
"journal-title": "Clin Biochem"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "93",
|
||||||
|
"author": "Sun",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib23",
|
||||||
|
"DOI": "10.1186/1471-2474-15-93",
|
||||||
|
"article-title": "The active form of MMP-3 is a marker of synovial inflammation and cartilage turnover in inflammatory joint diseases",
|
||||||
|
"year": "2014",
|
||||||
|
"volume": "15",
|
||||||
|
"journal-title": "BMC Muscoskelet Disord"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "103",
|
||||||
|
"issue": "1",
|
||||||
|
"author": "Chandrasekhar",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib24",
|
||||||
|
"DOI": "10.1016/0003-2697(87)90658-0",
|
||||||
|
"article-title": "Microdetermination of proteoglycans and glycosaminoglycans in the presence of guanidine hydrochloride",
|
||||||
|
"year": "1987",
|
||||||
|
"volume": "161",
|
||||||
|
"journal-title": "Anal Biochem"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "8722",
|
||||||
|
"issue": "22",
|
||||||
|
"author": "Hankins",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib25",
|
||||||
|
"DOI": "10.1073/pnas.1201313109",
|
||||||
|
"article-title": "Amino acid addition to Vibrio cholerae LPS establishes a link between surface remodeling in Gram-positive and Gram-negative bacteria",
|
||||||
|
"year": "2012",
|
||||||
|
"volume": "109",
|
||||||
|
"journal-title": "Proc Natl Acad Sci"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "c",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib26",
|
||||||
|
"first-page": "1",
|
||||||
|
"volume": "2",
|
||||||
|
"author": "Bailie",
|
||||||
|
"year": "2010",
|
||||||
|
"journal-title": "Supporting Information"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "1145",
|
||||||
|
"issue": "9",
|
||||||
|
"author": "Neogi",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib27",
|
||||||
|
"DOI": "10.1016/j.joca.2013.03.018",
|
||||||
|
"article-title": "The epidemiology and impact of pain in osteoarthritis",
|
||||||
|
"year": "2013",
|
||||||
|
"volume": "21",
|
||||||
|
"journal-title": "Osteoarthr Cartil"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "623",
|
||||||
|
"issue": "3",
|
||||||
|
"author": "Hunter",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib28",
|
||||||
|
"DOI": "10.1016/j.rdc.2008.05.004",
|
||||||
|
"article-title": "The symptoms of osteoarthritis and the genesis of pain",
|
||||||
|
"year": "2008",
|
||||||
|
"volume": "34",
|
||||||
|
"journal-title": "Rheum Dis Clin N Am"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "1326",
|
||||||
|
"issue": "11",
|
||||||
|
"author": "Xu",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib29",
|
||||||
|
"DOI": "10.1038/nm.3978",
|
||||||
|
"article-title": "Inhibition of mechanical allodynia in neuropathic pain by TLR5-mediated A-fiber blockade",
|
||||||
|
"year": "2015",
|
||||||
|
"volume": "21",
|
||||||
|
"journal-title": "Nat Med"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "145",
|
||||||
|
"issue": "October 2017",
|
||||||
|
"author": "Lacagnina",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib30",
|
||||||
|
"DOI": "10.1016/j.pharmthera.2017.10.006",
|
||||||
|
"article-title": "Toll-like receptors and their role in persistent pain",
|
||||||
|
"year": "2018",
|
||||||
|
"volume": "184",
|
||||||
|
"journal-title": "Pharmacol Ther"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib31",
|
||||||
|
"first-page": "2016",
|
||||||
|
"article-title": "Blockade of toll-like receptors (TLR2, TLR4) attenuates pain and potentiates buprenorphine analgesia in a rat neuropathic pain model",
|
||||||
|
"author": "Jurga",
|
||||||
|
"year": "2016",
|
||||||
|
"journal-title": "Neural Plast"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "357",
|
||||||
|
"issue": "5",
|
||||||
|
"author": "Huang",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib32",
|
||||||
|
"DOI": "10.1007/s11926-009-0051-z",
|
||||||
|
"article-title": "The role of toll-like receptors in rheumatoid arthritis",
|
||||||
|
"year": "2009",
|
||||||
|
"volume": "11",
|
||||||
|
"journal-title": "Curr Rheumatol Rep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "3",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib33",
|
||||||
|
"first-page": "1",
|
||||||
|
"article-title": "TLR4 signalling in osteoarthritis-finding targets for candidate DMOADs",
|
||||||
|
"volume": "11",
|
||||||
|
"author": "Gómez",
|
||||||
|
"year": "2014",
|
||||||
|
"journal-title": "Nat Rev Rheumatol"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "5 Pt 1",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib34",
|
||||||
|
"first-page": "1432",
|
||||||
|
"article-title": "Procollagen types I and III aminoterminal propeptide levels during acute respiratory distress syndrome and in response to methylprednisolone treatment.[Erratum appears in Am J Respir Crit Care Med. 2013 Dec 15;188(12):1477]",
|
||||||
|
"volume": "158",
|
||||||
|
"author": "Meduri",
|
||||||
|
"year": "1998",
|
||||||
|
"journal-title": "Am J Respir Crit Care Med"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "5",
|
||||||
|
"issue": "1",
|
||||||
|
"author": "Scheja",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib35",
|
||||||
|
"DOI": "10.3109/03009749209095054",
|
||||||
|
"article-title": "Serum levels of aminoterminal type III procollagen peptide and hyaluronan predict mortality in systemic sclerosis",
|
||||||
|
"year": "1992",
|
||||||
|
"volume": "21",
|
||||||
|
"journal-title": "Scand J Rheumatol"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "547",
|
||||||
|
"issue": "8",
|
||||||
|
"author": "Kjelgaard-Petersen",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib36",
|
||||||
|
"DOI": "10.3109/1354750X.2015.1105497",
|
||||||
|
"article-title": "Synovitis biomarkers: ex vivo characterization of three biomarkers for identification of inflammatory osteoarthritis",
|
||||||
|
"year": "2015",
|
||||||
|
"volume": "20",
|
||||||
|
"journal-title": "Biomarkers"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib37",
|
||||||
|
"article-title": "Sensitization and serological biomarkers in knee osteoarthritis patients with different degrees of synovitis",
|
||||||
|
"author": "Petersen",
|
||||||
|
"year": "2015",
|
||||||
|
"journal-title": "Clin J Pain"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "309",
|
||||||
|
"author": "He",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib38",
|
||||||
|
"DOI": "10.1186/1471-2474-15-309",
|
||||||
|
"article-title": "Type X collagen levels are elevated in serum from human osteoarthritis patients and associated with biomarkers of cartilage degradation and inflammation",
|
||||||
|
"year": "2014",
|
||||||
|
"volume": "15",
|
||||||
|
"journal-title": "BMC Muscoskelet Disord"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "1",
|
||||||
|
"issue": "1",
|
||||||
|
"author": "Bay-Jensen",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib39",
|
||||||
|
"DOI": "10.1371/journal.pone.0054504",
|
||||||
|
"article-title": "Circulating protein fragments of cartilage and connective tissue degradation are diagnostic and prognostic markers of rheumatoid arthritis and ankylosing spondylitis",
|
||||||
|
"year": "2013",
|
||||||
|
"volume": "8",
|
||||||
|
"journal-title": "PLoS One"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "44",
|
||||||
|
"issue": "1",
|
||||||
|
"author": "Siebuhr",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib40",
|
||||||
|
"DOI": "10.1016/j.joca.2013.10.020",
|
||||||
|
"article-title": "Identification and characterisation of osteoarthritis patients with inflammation derived tissue turnover",
|
||||||
|
"year": "2014",
|
||||||
|
"volume": "22",
|
||||||
|
"journal-title": "Osteoarthr Cartil"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "221",
|
||||||
|
"issue": "3",
|
||||||
|
"author": "Yuji Yoshida",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib41",
|
||||||
|
"DOI": "10.1016/0306-9877(88)90147-8",
|
||||||
|
"article-title": "Interleukin 6 and rheumatoid arthritis",
|
||||||
|
"year": "1988",
|
||||||
|
"volume": "27",
|
||||||
|
"journal-title": "Med Hypotheses"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "5173",
|
||||||
|
"issue": "22",
|
||||||
|
"author": "Suthaus",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib42",
|
||||||
|
"DOI": "10.1182/blood-2011-09-377705",
|
||||||
|
"article-title": "HHV8 encoded viral IL-6 collaborates with mouse IL-6 in MCD-like development in mice",
|
||||||
|
"year": "2012",
|
||||||
|
"volume": "119",
|
||||||
|
"journal-title": "Blood"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "1",
|
||||||
|
"issue": "1",
|
||||||
|
"author": "Zhou",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib43",
|
||||||
|
"DOI": "10.1186/s12974-016-0607-6",
|
||||||
|
"article-title": "Interleukin-6: an emerging regulator of pathological pain",
|
||||||
|
"year": "2016",
|
||||||
|
"volume": "13",
|
||||||
|
"journal-title": "J Neuroinflammation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib44",
|
||||||
|
"series-title": "Local Translation and Retrograde Axonal Transport of Creb Regulates Il-6-Induced Nociceptive Plasticity",
|
||||||
|
"first-page": "1",
|
||||||
|
"author": "Melemedjian",
|
||||||
|
"year": "2014"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "6",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib45",
|
||||||
|
"first-page": "1149",
|
||||||
|
"article-title": "Aggrecanase and Aggrecan degradation in osteoarthritis: a review",
|
||||||
|
"volume": "36",
|
||||||
|
"author": "Huang",
|
||||||
|
"year": "2008",
|
||||||
|
"journal-title": "J Int Med Res"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "133",
|
||||||
|
"issue": "1",
|
||||||
|
"author": "Troeberg",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib46",
|
||||||
|
"DOI": "10.1016/j.bbapap.2011.06.020",
|
||||||
|
"article-title": "Proteases involved in cartilage matrix degradation in osteoarthritis",
|
||||||
|
"year": "2012",
|
||||||
|
"volume": "1824",
|
||||||
|
"journal-title": "Biochim Biophys Acta Protein Proteonomics"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "1633",
|
||||||
|
"issue": "11",
|
||||||
|
"author": "Zhang",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib47",
|
||||||
|
"DOI": "10.1136/ard.2007.079574",
|
||||||
|
"article-title": "Differential Toll-like receptor-dependent collagenase expression in chondrocytes",
|
||||||
|
"year": "2008",
|
||||||
|
"volume": "67",
|
||||||
|
"journal-title": "Ann Rheum Dis"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "2152",
|
||||||
|
"issue": "7",
|
||||||
|
"author": "Kim",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib48",
|
||||||
|
"DOI": "10.1002/art.21951",
|
||||||
|
"article-title": "The catabolic pathway mediated by toll-like receptors in human osteoarthritic chondrocytes",
|
||||||
|
"year": "2006",
|
||||||
|
"volume": "54",
|
||||||
|
"journal-title": "Arthritis Rheum"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"issue": "2018",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib49",
|
||||||
|
"article-title": "The anti-ADAMTS-5 nanobody®, M6495, protects against cartilage breakdown in cartilage and synovial joint tissue explant models",
|
||||||
|
"volume": "26",
|
||||||
|
"author": "Siebuhr",
|
||||||
|
"year": "2018",
|
||||||
|
"journal-title": "Osteoarthr Cartil"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doi-asserted-by": "crossref",
|
||||||
|
"first-page": "2933",
|
||||||
|
"issue": "11",
|
||||||
|
"author": "Miller",
|
||||||
|
"key": "10.1016/j.joca.2019.11.002_bib50",
|
||||||
|
"DOI": "10.1002/art.39291",
|
||||||
|
"article-title": "Damage-associated molecular patterns generated in osteoarthritis directly excite murine nociceptive neurons through toll-like receptor 4",
|
||||||
|
"year": "2015",
|
||||||
|
"volume": "67",
|
||||||
|
"journal-title": "Arthritis Rheum"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": [
|
||||||
|
"Inflammation and joint destruction may be linked to the generation of cartilage metabolites of ADAMTS-5 through activation of toll-like receptors"
|
||||||
|
],
|
||||||
|
"link": [
|
||||||
|
{
|
||||||
|
"URL": "https://api.elsevier.com/content/article/PII:S106345841931266X?httpAccept=text/xml",
|
||||||
|
"content-type": "text/xml",
|
||||||
|
"content-version": "vor",
|
||||||
|
"intended-application": "text-mining"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"URL": "https://api.elsevier.com/content/article/PII:S106345841931266X?httpAccept=text/plain",
|
||||||
|
"content-type": "text/plain",
|
||||||
|
"content-version": "vor",
|
||||||
|
"intended-application": "text-mining"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": "Crossref",
|
||||||
|
"type": "journal-article",
|
||||||
|
"publisher": "Elsevier BV",
|
||||||
|
"journal-issue": {
|
||||||
|
"issue": "5",
|
||||||
|
"published-print": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
5
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"volume": "28",
|
||||||
|
"references-count": 50,
|
||||||
|
"issn-type": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"value": "2227-9717",
|
||||||
|
"type": "electronic"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "VALUE",
|
||||||
|
"type": "PIPPO"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "1063-4584",
|
||||||
|
"type": "pu"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"assertion": [
|
||||||
|
{
|
||||||
|
"value": "Elsevier",
|
||||||
|
"name": "publisher",
|
||||||
|
"label": "This article is maintained by"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "Inflammation and joint destruction may be linked to the generation of cartilage metabolites of ADAMTS-5 through activation of toll-like receptors",
|
||||||
|
"name": "articletitle",
|
||||||
|
"label": "Article Title"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "Osteoarthritis and Cartilage",
|
||||||
|
"name": "journaltitle",
|
||||||
|
"label": "Journal Title"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "https://doi.org/10.1016/j.joca.2019.11.002",
|
||||||
|
"name": "articlelink",
|
||||||
|
"label": "CrossRef DOI link to publisher maintained version"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "article",
|
||||||
|
"name": "content_type",
|
||||||
|
"label": "Content Type"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "© 2019 Osteoarthritis Research Society International. Published by Elsevier Ltd.",
|
||||||
|
"name": "copyright",
|
||||||
|
"label": "Copyright"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deposited": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2022,
|
||||||
|
7,
|
||||||
|
9
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2022-07-09T14:08:02Z",
|
||||||
|
"timestamp": 1657375682000
|
||||||
|
},
|
||||||
|
"language": "en",
|
||||||
|
"page": "658-668",
|
||||||
|
"short-container-title": [
|
||||||
|
"Osteoarthritis and Cartilage"
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,21 @@
|
||||||
|
package eu.dnetlib.dhp.collection.crossref
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||||
|
import org.junit.jupiter.api.BeforeEach
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||||
|
class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
|
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||||
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
def setUp(): Unit = {
|
||||||
|
super.setUpVocabulary()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,85 @@
|
||||||
|
package eu.dnetlib.dhp.collection.mag
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Dataset, Publication, Result}
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.junit.jupiter.api.Assertions._
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
|
class MAGMappingTest {
|
||||||
|
|
||||||
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
|
||||||
|
def mappingTest(): Unit = {
|
||||||
|
|
||||||
|
val spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName("Test")
|
||||||
|
.master("local[*]")
|
||||||
|
.getOrCreate()
|
||||||
|
|
||||||
|
val s = new SparkMagOrganizationAS(null, null, null)
|
||||||
|
|
||||||
|
s.generateAS(spark, "/home/sandro/Downloads/mag_test", "/home/sandro/Downloads/mag_AS")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def mappingMagType(): Unit = {
|
||||||
|
|
||||||
|
checkResult[Publication](MagUtility.createResultFromType(null, null), invisible = false, "Other literature type")
|
||||||
|
checkResult[Publication](
|
||||||
|
MagUtility.createResultFromType(Some("BookChapter"), null),
|
||||||
|
invisible = false,
|
||||||
|
"Part of book or chapter of book"
|
||||||
|
)
|
||||||
|
checkResult[Publication](MagUtility.createResultFromType(Some("Book"), null), invisible = false, "Book")
|
||||||
|
checkResult[Publication](
|
||||||
|
MagUtility.createResultFromType(Some("Repository"), null),
|
||||||
|
invisible = true,
|
||||||
|
"Other literature type"
|
||||||
|
)
|
||||||
|
checkResult[Publication](MagUtility.createResultFromType(Some("Thesis"), null), invisible = false, "Thesis")
|
||||||
|
checkResult[Publication](MagUtility.createResultFromType(Some("Conference"), null), invisible = false, "Article")
|
||||||
|
checkResult[Publication](MagUtility.createResultFromType(Some("Journal"), null), invisible = false, "Journal")
|
||||||
|
checkResult[Dataset](MagUtility.createResultFromType(Some("Dataset"), null), invisible = false, "Dataset")
|
||||||
|
checkResult[Publication](
|
||||||
|
MagUtility.createResultFromType(Some("Patent"), Some("Patent Department of the Navy")),
|
||||||
|
invisible = false,
|
||||||
|
"Patent"
|
||||||
|
)
|
||||||
|
checkResult[Publication](
|
||||||
|
MagUtility.createResultFromType(Some("Patent"), Some("Brevet Department of the Navy")),
|
||||||
|
invisible = false,
|
||||||
|
"Patent"
|
||||||
|
)
|
||||||
|
checkResult[Publication](
|
||||||
|
MagUtility.createResultFromType(Some("Patent"), Some("Journal of the Navy")),
|
||||||
|
invisible = false,
|
||||||
|
"Journal"
|
||||||
|
)
|
||||||
|
checkResult[Publication](
|
||||||
|
MagUtility.createResultFromType(Some("Patent"), Some("Proceedings of the Navy")),
|
||||||
|
invisible = false,
|
||||||
|
"Article"
|
||||||
|
)
|
||||||
|
checkResult[Dataset](MagUtility.createResultFromType(Some("Dataset"), null), invisible = false, "Dataset")
|
||||||
|
assertNull(MagUtility.createResultFromType(Some("Patent"), null))
|
||||||
|
assertNull(MagUtility.createResultFromType(Some("Patent"), Some("Some name ")))
|
||||||
|
}
|
||||||
|
|
||||||
|
def checkResult[T](r: Result, invisible: Boolean, typeName: String): Unit = {
|
||||||
|
|
||||||
|
assertNotNull(r)
|
||||||
|
assertTrue(r.isInstanceOf[T])
|
||||||
|
assertNotNull(r.getDataInfo)
|
||||||
|
assertEquals(invisible, r.getDataInfo.getInvisible)
|
||||||
|
assertNotNull(r.getInstance())
|
||||||
|
assertTrue(r.getInstance().size() > 0)
|
||||||
|
assertNotNull(r.getInstance().get(0).getInstancetype)
|
||||||
|
assertEquals(typeName, r.getInstance().get(0).getInstancetype.getClassname)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,6 +1,26 @@
|
||||||
[
|
[
|
||||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true},
|
{
|
||||||
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source path ", "paramRequired": false},
|
"paramName": "t",
|
||||||
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
|
"paramLongName": "targetPath",
|
||||||
|
"paramDescription": "the path of the OAF Orcid transformed",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "i",
|
||||||
|
"paramLongName": "isLookupUrl",
|
||||||
|
"paramDescription": "the isLookup URL",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "s",
|
||||||
|
"paramLongName": "sourcePath",
|
||||||
|
"paramDescription": "the source path ",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "m",
|
||||||
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "the master name",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
]
|
]
|
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.doiboost.crossref
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf._
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
||||||
|
@ -47,59 +48,6 @@ case object Crossref2Oaf {
|
||||||
json.extract[List[funderInfo]]
|
json.extract[List[funderInfo]]
|
||||||
}
|
}
|
||||||
|
|
||||||
val mappingCrossrefType = Map(
|
|
||||||
"book-section" -> "publication",
|
|
||||||
"book" -> "publication",
|
|
||||||
"book-chapter" -> "publication",
|
|
||||||
"book-part" -> "publication",
|
|
||||||
"book-series" -> "publication",
|
|
||||||
"book-set" -> "publication",
|
|
||||||
"book-track" -> "publication",
|
|
||||||
"edited-book" -> "publication",
|
|
||||||
"reference-book" -> "publication",
|
|
||||||
"monograph" -> "publication",
|
|
||||||
"journal-article" -> "publication",
|
|
||||||
"dissertation" -> "publication",
|
|
||||||
"other" -> "publication",
|
|
||||||
"peer-review" -> "publication",
|
|
||||||
"proceedings" -> "publication",
|
|
||||||
"proceedings-article" -> "publication",
|
|
||||||
"reference-entry" -> "publication",
|
|
||||||
"report" -> "publication",
|
|
||||||
"report-series" -> "publication",
|
|
||||||
"standard" -> "publication",
|
|
||||||
"standard-series" -> "publication",
|
|
||||||
"posted-content" -> "publication",
|
|
||||||
"dataset" -> "dataset"
|
|
||||||
)
|
|
||||||
|
|
||||||
val mappingCrossrefSubType = Map(
|
|
||||||
"book-section" -> "0013 Part of book or chapter of book",
|
|
||||||
"book" -> "0002 Book",
|
|
||||||
"book-chapter" -> "0013 Part of book or chapter of book",
|
|
||||||
"book-part" -> "0013 Part of book or chapter of book",
|
|
||||||
"book-series" -> "0002 Book",
|
|
||||||
"book-set" -> "0002 Book",
|
|
||||||
"book-track" -> "0002 Book",
|
|
||||||
"edited-book" -> "0002 Book",
|
|
||||||
"reference-book" -> "0002 Book",
|
|
||||||
"monograph" -> "0002 Book",
|
|
||||||
"journal-article" -> "0001 Article",
|
|
||||||
"dissertation" -> "0044 Thesis",
|
|
||||||
"other" -> "0038 Other literature type",
|
|
||||||
"peer-review" -> "0015 Review",
|
|
||||||
"proceedings" -> "0004 Conference object",
|
|
||||||
"proceedings-article" -> "0004 Conference object",
|
|
||||||
"reference-entry" -> "0013 Part of book or chapter of book",
|
|
||||||
"report" -> "0017 Report",
|
|
||||||
"report-series" -> "0017 Report",
|
|
||||||
"standard" -> "0038 Other literature type",
|
|
||||||
"standard-series" -> "0038 Other literature type",
|
|
||||||
"dataset" -> "0021 Dataset",
|
|
||||||
"preprint" -> "0016 Preprint",
|
|
||||||
"report" -> "0017 Report"
|
|
||||||
)
|
|
||||||
|
|
||||||
def getIrishId(doi: String): Option[String] = {
|
def getIrishId(doi: String): Option[String] = {
|
||||||
val id = doi.split("/").last
|
val id = doi.split("/").last
|
||||||
irishFunder
|
irishFunder
|
||||||
|
@ -107,7 +55,7 @@ case object Crossref2Oaf {
|
||||||
.map(f => f.id)
|
.map(f => f.id)
|
||||||
}
|
}
|
||||||
|
|
||||||
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
|
def mappingResult(result: Result, json: JValue, instanceType: Qualifier, originalType: String): Result = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
//MAPPING Crossref DOI into PID
|
//MAPPING Crossref DOI into PID
|
||||||
|
@ -275,27 +223,20 @@ case object Crossref2Oaf {
|
||||||
instance.setAccessright(
|
instance.setAccessright(
|
||||||
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
||||||
)
|
)
|
||||||
instance.setInstancetype(
|
instance.setInstancetype(instanceType)
|
||||||
OafMapperUtils.qualifier(
|
|
||||||
cobjCategory.substring(0, 4),
|
|
||||||
cobjCategory.substring(5),
|
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
|
||||||
)
|
|
||||||
)
|
|
||||||
//ADD ORIGINAL TYPE to the mapping
|
//ADD ORIGINAL TYPE to the mapping
|
||||||
val itm = new InstanceTypeMapping
|
val itm = new InstanceTypeMapping
|
||||||
itm.setOriginalType(originalType)
|
itm.setOriginalType(originalType)
|
||||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||||
instance.setInstanceTypeMapping(List(itm).asJava)
|
instance.setInstanceTypeMapping(List(itm).asJava)
|
||||||
result.setResourcetype(
|
// result.setResourcetype(
|
||||||
OafMapperUtils.qualifier(
|
// OafMapperUtils.qualifier(
|
||||||
cobjCategory.substring(0, 4),
|
// cobjCategory.substring(0, 4),
|
||||||
cobjCategory.substring(5),
|
// cobjCategory.substring(5),
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
// ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
// ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
)
|
// )
|
||||||
)
|
// )
|
||||||
|
|
||||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||||
if (StringUtils.isNotBlank(issuedDate)) {
|
if (StringUtils.isNotBlank(issuedDate)) {
|
||||||
|
@ -354,7 +295,40 @@ case object Crossref2Oaf {
|
||||||
a
|
a
|
||||||
}
|
}
|
||||||
|
|
||||||
def convert(input: String): List[Oaf] = {
|
/** *
|
||||||
|
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||||
|
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||||
|
* to generate one of the following main entities:
|
||||||
|
* - publication
|
||||||
|
* - dataset
|
||||||
|
* - software
|
||||||
|
* - otherresearchproduct
|
||||||
|
*
|
||||||
|
* @param resourceType
|
||||||
|
* @param vocabularies
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
def getTypeQualifier(
|
||||||
|
resourceType: String,
|
||||||
|
vocabularies: VocabularyGroup
|
||||||
|
): (Qualifier, Qualifier, String) = {
|
||||||
|
if (resourceType != null && resourceType.nonEmpty) {
|
||||||
|
val typeQualifier =
|
||||||
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||||
|
if (typeQualifier != null)
|
||||||
|
return (
|
||||||
|
typeQualifier,
|
||||||
|
vocabularies.getSynonymAsQualifier(
|
||||||
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||||
|
typeQualifier.getClassid
|
||||||
|
),
|
||||||
|
resourceType
|
||||||
|
)
|
||||||
|
}
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
|
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json: json4s.JValue = parse(input)
|
lazy val json: json4s.JValue = parse(input)
|
||||||
|
|
||||||
|
@ -364,17 +338,16 @@ case object Crossref2Oaf {
|
||||||
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
||||||
if (objectType == null)
|
if (objectType == null)
|
||||||
return resultList
|
return resultList
|
||||||
|
val typology = getTypeQualifier(objectType, vocabularies)
|
||||||
|
|
||||||
val result = generateItemFromType(objectType, objectSubType)
|
if (typology == null)
|
||||||
|
return List()
|
||||||
|
|
||||||
|
val result = generateItemFromType(typology._2)
|
||||||
if (result == null)
|
if (result == null)
|
||||||
return List()
|
return List()
|
||||||
val cOBJCategory = mappingCrossrefSubType.getOrElse(
|
|
||||||
objectType,
|
|
||||||
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
|
|
||||||
)
|
|
||||||
|
|
||||||
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType
|
mappingResult(result, json, typology._1, typology._3)
|
||||||
mappingResult(result, json, cOBJCategory, originalType)
|
|
||||||
if (result == null || result.getId == null)
|
if (result == null || result.getId == null)
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
@ -392,7 +365,7 @@ case object Crossref2Oaf {
|
||||||
}
|
}
|
||||||
|
|
||||||
result match {
|
result match {
|
||||||
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
case publication: Publication => convertPublication(publication, json, typology._1)
|
||||||
case dataset: Dataset => convertDataset(dataset)
|
case dataset: Dataset => convertDataset(dataset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -622,12 +595,12 @@ case object Crossref2Oaf {
|
||||||
// TODO check if there are other info to map into the Dataset
|
// TODO check if there are other info to map into the Dataset
|
||||||
}
|
}
|
||||||
|
|
||||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
|
def convertPublication(publication: Publication, json: JValue, cobjCategory: Qualifier): Unit = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
||||||
|
|
||||||
//Mapping book
|
//Mapping book
|
||||||
if (cobjCategory.toLowerCase.contains("book")) {
|
if (cobjCategory.getClassname.toLowerCase.contains("book")) {
|
||||||
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
||||||
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
||||||
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
||||||
|
@ -708,12 +681,23 @@ case object Crossref2Oaf {
|
||||||
null
|
null
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateItemFromType(objectType: String, objectSubType: String): Result = {
|
def generateItemFromType(objectType: Qualifier): Result = {
|
||||||
if (mappingCrossrefType.contains(objectType)) {
|
if (objectType.getClassid.equalsIgnoreCase("publication")) {
|
||||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
|
val item = new Publication
|
||||||
return new Publication()
|
item.setResourcetype(objectType)
|
||||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))
|
return item
|
||||||
return new Dataset()
|
} else if (objectType.getClassid.equalsIgnoreCase("dataset")) {
|
||||||
|
val item = new Dataset
|
||||||
|
item.setResourcetype(objectType)
|
||||||
|
return item
|
||||||
|
} else if (objectType.getClassid.equalsIgnoreCase("software")) {
|
||||||
|
val item = new Software
|
||||||
|
item.setResourcetype(objectType)
|
||||||
|
return item
|
||||||
|
} else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")) {
|
||||||
|
val item = new OtherResearchProduct
|
||||||
|
item.setResourcetype(objectType)
|
||||||
|
return item
|
||||||
}
|
}
|
||||||
null
|
null
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
package eu.dnetlib.doiboost.crossref
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.oaf
|
import eu.dnetlib.dhp.schema.oaf
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
|
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
|
@ -40,11 +42,16 @@ object SparkMapDumpIntoOAF {
|
||||||
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
|
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
|
||||||
|
|
||||||
val targetPath = parser.get("targetPath")
|
val targetPath = parser.get("targetPath")
|
||||||
|
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||||
|
logger.info("isLookupUrl: {}", isLookupUrl)
|
||||||
|
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
||||||
|
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||||
|
require(vocabularies != null)
|
||||||
|
|
||||||
spark.read
|
spark.read
|
||||||
.load(parser.get("sourcePath"))
|
.load(parser.get("sourcePath"))
|
||||||
.as[CrossrefDT]
|
.as[CrossrefDT]
|
||||||
.flatMap(k => Crossref2Oaf.convert(k.json))
|
.flatMap(k => Crossref2Oaf.convert(k.json, vocabularies))
|
||||||
.filter(o => o != null)
|
.filter(o => o != null)
|
||||||
.write
|
.write
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
|
|
@ -71,6 +71,7 @@ object UnpayWallToOAF {
|
||||||
}
|
}
|
||||||
|
|
||||||
def convertToOAF(input: String): Publication = {
|
def convertToOAF(input: String): Publication = {
|
||||||
|
|
||||||
val pub = new Publication
|
val pub = new Publication
|
||||||
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.aggregation;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public abstract class AbstractVocabularyTest {
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
protected ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
protected VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
public void setUpVocabulary() throws ISLookUpException, IOException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||||
|
|
||||||
|
lenient()
|
||||||
|
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||||
|
.thenReturn(synonyms());
|
||||||
|
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> vocs() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/terms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> synonyms() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/synonyms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -57,7 +57,7 @@
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"type": "posted-content",
|
"type": "journal-article",
|
||||||
"URL": "http://dx.doi.org/10.1101/030080",
|
"URL": "http://dx.doi.org/10.1101/030080",
|
||||||
"is-referenced-by-count": 2,
|
"is-referenced-by-count": 2,
|
||||||
"link": [
|
"link": [
|
||||||
|
|
|
@ -1,33 +1,44 @@
|
||||||
package eu.dnetlib.dhp.doiboost.crossref
|
package eu.dnetlib.dhp.doiboost.crossref
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.SerializationFeature
|
||||||
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf._
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
|
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
|
||||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
|
||||||
import org.json4s
|
import org.json4s
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||||
import org.json4s.{DefaultFormats, JValue}
|
import org.json4s.{DefaultFormats, JValue}
|
||||||
import org.json4s.jackson.JsonMethods
|
import org.json4s.jackson.JsonMethods
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
|
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
import java.nio.file.Files
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
import scala.util.matching.Regex
|
import scala.util.matching.Regex
|
||||||
|
|
||||||
class CrossrefMappingTest {
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||||
|
class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
def setUp(): Unit = {
|
||||||
|
super.setUpVocabulary()
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testMissingAuthorParser(): Unit = {
|
def testMissingAuthorParser(): Unit = {
|
||||||
val json: String = Source
|
val json: String = Source
|
||||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json"))
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json"))
|
||||||
.mkString
|
.mkString
|
||||||
val result = Crossref2Oaf.convert(json)
|
val result = Crossref2Oaf.convert(json, vocabularies)
|
||||||
result
|
result
|
||||||
.filter(o => o.isInstanceOf[Publication])
|
.filter(o => o.isInstanceOf[Publication])
|
||||||
.map(p => p.asInstanceOf[Publication])
|
.map(p => p.asInstanceOf[Publication])
|
||||||
|
@ -50,13 +61,13 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
for (line <- funder_doi.linesWithSeparators.map(l => l.stripLineEnd)) {
|
for (line <- funder_doi.linesWithSeparators.map(l => l.stripLineEnd)) {
|
||||||
val json = template.replace("%s", line)
|
val json = template.replace("%s", line)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
checkRelation(resultList)
|
checkRelation(resultList)
|
||||||
}
|
}
|
||||||
for (line <- funder_name.linesWithSeparators.map(l => l.stripLineEnd)) {
|
for (line <- funder_name.linesWithSeparators.map(l => l.stripLineEnd)) {
|
||||||
val json = template.replace("%s", line)
|
val json = template.replace("%s", line)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
checkRelation(resultList)
|
checkRelation(resultList)
|
||||||
}
|
}
|
||||||
|
@ -96,7 +107,7 @@ class CrossrefMappingTest {
|
||||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||||
|
@ -115,14 +126,13 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||||
|
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||||
items.foreach(p => println(mapper.writeValueAsString(p)))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,7 +152,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
|
|
||||||
val result: List[Oaf] = Crossref2Oaf.convert(json)
|
val result: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(result.nonEmpty)
|
assertTrue(result.nonEmpty)
|
||||||
|
|
||||||
|
@ -163,8 +173,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertEquals(doisReference.size, relationList.size)
|
assertEquals(doisReference.size, relationList.size)
|
||||||
|
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
relationList.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||||
relationList.foreach(p => println(mapper.writeValueAsString(p)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -178,14 +187,13 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||||
|
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||||
items.foreach(p => println(mapper.writeValueAsString(p)))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,18 +202,17 @@ class CrossrefMappingTest {
|
||||||
val json = Source
|
val json = Source
|
||||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json"))
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json"))
|
||||||
.mkString
|
.mkString
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
|
||||||
|
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||||
|
|
||||||
items.foreach(p => logger.info(mapper.writeValueAsString(p)))
|
items.foreach(p => logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,7 +242,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
val rels: List[Relation] =
|
val rels: List[Relation] =
|
||||||
|
@ -255,7 +262,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -266,7 +273,7 @@ class CrossrefMappingTest {
|
||||||
val result: Result = items.head.asInstanceOf[Result]
|
val result: Result = items.head.asInstanceOf[Result]
|
||||||
assertNotNull(result)
|
assertNotNull(result)
|
||||||
|
|
||||||
logger.info(mapper.writeValueAsString(result));
|
logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(result));
|
||||||
|
|
||||||
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
||||||
assertNotNull(
|
assertNotNull(
|
||||||
|
@ -331,7 +338,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -415,7 +422,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -463,7 +470,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -542,7 +549,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -568,7 +575,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -578,7 +585,8 @@ class CrossrefMappingTest {
|
||||||
assert(items.size == 1)
|
assert(items.size == 1)
|
||||||
val result: Result = items.head.asInstanceOf[Publication]
|
val result: Result = items.head.asInstanceOf[Publication]
|
||||||
assertNotNull(result)
|
assertNotNull(result)
|
||||||
logger.info(mapper.writeValueAsString(result));
|
|
||||||
|
logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -591,7 +599,7 @@ class CrossrefMappingTest {
|
||||||
val line: String =
|
val line: String =
|
||||||
"\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
|
"\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
|
||||||
val json = template.replace("%s", line)
|
val json = template.replace("%s", line)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||||
val result: Result = items.head.asInstanceOf[Publication]
|
val result: Result = items.head.asInstanceOf[Publication]
|
||||||
|
@ -610,7 +618,7 @@ class CrossrefMappingTest {
|
||||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json"))
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json"))
|
||||||
.mkString
|
.mkString
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(template)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(template, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||||
val result: Result = items.head.asInstanceOf[Publication]
|
val result: Result = items.head.asInstanceOf[Publication]
|
||||||
|
@ -634,14 +642,13 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||||
|
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||||
println(mapper.writeValueAsString(item))
|
|
||||||
|
|
||||||
assertTrue(
|
assertTrue(
|
||||||
item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))
|
item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))
|
||||||
|
@ -664,7 +671,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -681,8 +688,8 @@ class CrossrefMappingTest {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
||||||
)
|
)
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
|
||||||
println(mapper.writeValueAsString(item))
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -699,7 +706,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -716,8 +723,7 @@ class CrossrefMappingTest {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
||||||
)
|
)
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||||
println(mapper.writeValueAsString(item))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -734,7 +740,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -751,8 +757,7 @@ class CrossrefMappingTest {
|
||||||
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
|
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
|
||||||
)
|
)
|
||||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||||
println(mapper.writeValueAsString(item))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -769,7 +774,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -786,8 +791,8 @@ class CrossrefMappingTest {
|
||||||
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
|
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
|
||||||
)
|
)
|
||||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
||||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
|
||||||
println(mapper.writeValueAsString(item))
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -802,7 +807,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -812,9 +817,8 @@ class CrossrefMappingTest {
|
||||||
assertEquals(1, item.getInstance().get(0).getUrl().size())
|
assertEquals(1, item.getInstance().get(0).getUrl().size())
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"https://doi.org/10.1016/j.jas.2019.105013",
|
"https://doi.org/10.1016/j.jas.2019.105013",
|
||||||
item.getInstance().get(0).getUrl().get(0)
|
item.getInstance().get(0).getUrl.get(0)
|
||||||
)
|
)
|
||||||
//println(mapper.writeValueAsString(item))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue