forked from D-Net/dnet-hadoop
update crossref mapping to be runnable separately as a single datasource outside doiboost
This commit is contained in:
parent
5281f010a5
commit
58dbe71d39
|
@ -5,8 +5,6 @@ import org.apache.commons.lang3.StringUtils;
|
|||
|
||||
public class DoiCleaningRule {
|
||||
|
||||
|
||||
|
||||
public static String clean(final String doi) {
|
||||
return doi
|
||||
.toLowerCase()
|
||||
|
@ -15,20 +13,19 @@ public class DoiCleaningRule {
|
|||
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
||||
}
|
||||
|
||||
public static String normalizeDoi(final String input){
|
||||
public static String normalizeDoi(final String input) {
|
||||
if (input == null)
|
||||
return null;
|
||||
final String replaced = input
|
||||
.replaceAll("\\n|\\r|\\t|\\s", "")
|
||||
.toLowerCase()
|
||||
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
||||
.replaceAll("\\n|\\r|\\t|\\s", "")
|
||||
.toLowerCase()
|
||||
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
||||
if (StringUtils.isEmpty(replaced))
|
||||
return null;
|
||||
|
||||
if (!replaced.contains("10."))
|
||||
return null;
|
||||
|
||||
|
||||
final String ret = replaced.substring(replaced.indexOf("10."));
|
||||
|
||||
if (!ret.startsWith(CleaningFunctions.DOI_PREFIX))
|
||||
|
|
|
@ -165,7 +165,7 @@ public class OaiIterator implements Iterator<String> {
|
|||
} catch (final DocumentException e1) {
|
||||
final String resumptionToken = extractResumptionToken(xml);
|
||||
if (resumptionToken == null) {
|
||||
report.put(e1.getClass().getName(), e1.getMessage());
|
||||
report.put(e1.getClass().getName(), e1.getMessage());
|
||||
throw new CollectorException("Error parsing cleaned document:\n" + cleaned, e1);
|
||||
}
|
||||
return resumptionToken;
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
[
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "s",
|
||||
"paramLongName": "sourcePath",
|
||||
"paramDescription": "The base path of Crossref DUMP",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "The target path",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "i",
|
||||
"paramLongName": "isLookupUrl",
|
||||
"paramDescription": "the Information System Service LookUp URL",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,940 @@
|
|||
[
|
||||
{
|
||||
"id": "100007630",
|
||||
"uri": "http://dx.doi.org/10.13039/100007630",
|
||||
"name": "College of Engineering and Informatics, National University of Ireland, Galway",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100007731",
|
||||
"uri": "http://dx.doi.org/10.13039/100007731",
|
||||
"name": "Endo International",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100008099",
|
||||
"uri": "http://dx.doi.org/10.13039/100008099",
|
||||
"name": "Food Safety Authority of Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100008124",
|
||||
"uri": "http://dx.doi.org/10.13039/100008124",
|
||||
"name": "Department of Jobs, Enterprise and Innovation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100009098",
|
||||
"uri": "http://dx.doi.org/10.13039/100009098",
|
||||
"name": "Department of Foreign Affairs and Trade, Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100009099",
|
||||
"uri": "http://dx.doi.org/10.13039/100009099",
|
||||
"name": "Irish Aid",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100009770",
|
||||
"uri": "http://dx.doi.org/10.13039/100009770",
|
||||
"name": "National University of Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100009985",
|
||||
"uri": "http://dx.doi.org/10.13039/100009985",
|
||||
"name": "Parkinson's Association of Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100010399",
|
||||
"uri": "http://dx.doi.org/10.13039/100010399",
|
||||
"name": "European Society of Cataract and Refractive Surgeons",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100010414",
|
||||
"uri": "http://dx.doi.org/10.13039/100010414",
|
||||
"name": "Health Research Board",
|
||||
"synonym": [
|
||||
"501100001590"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "100010546",
|
||||
"uri": "http://dx.doi.org/10.13039/100010546",
|
||||
"name": "Deparment of Children and Youth Affairs, Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100010993",
|
||||
"uri": "http://dx.doi.org/10.13039/100010993",
|
||||
"name": "Irish Nephrology Society",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100011062",
|
||||
"uri": "http://dx.doi.org/10.13039/100011062",
|
||||
"name": "Asian Spinal Cord Network",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100011096",
|
||||
"uri": "http://dx.doi.org/10.13039/100011096",
|
||||
"name": "Jazz Pharmaceuticals",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100011396",
|
||||
"uri": "http://dx.doi.org/10.13039/100011396",
|
||||
"name": "Irish College of General Practitioners",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100012734",
|
||||
"uri": "http://dx.doi.org/10.13039/100012734",
|
||||
"name": "Department for Culture, Heritage and the Gaeltacht, Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100012754",
|
||||
"uri": "http://dx.doi.org/10.13039/100012754",
|
||||
"name": "Horizon Pharma",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100012891",
|
||||
"uri": "http://dx.doi.org/10.13039/100012891",
|
||||
"name": "Medical Research Charities Group",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100012919",
|
||||
"uri": "http://dx.doi.org/10.13039/100012919",
|
||||
"name": "Epilepsy Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100012920",
|
||||
"uri": "http://dx.doi.org/10.13039/100012920",
|
||||
"name": "GLEN",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100012921",
|
||||
"uri": "http://dx.doi.org/10.13039/100012921",
|
||||
"name": "Royal College of Surgeons in Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100013029",
|
||||
"uri": "http://dx.doi.org/10.13039/100013029",
|
||||
"name": "Iris O'Brien Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100013206",
|
||||
"uri": "http://dx.doi.org/10.13039/100013206",
|
||||
"name": "Food Institutional Research Measure",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100013381",
|
||||
"uri": "http://dx.doi.org/10.13039/100013381",
|
||||
"name": "Irish Phytochemical Food Network",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100013433",
|
||||
"uri": "http://dx.doi.org/10.13039/100013433",
|
||||
"name": "Transport Infrastructure Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100013461",
|
||||
"uri": "http://dx.doi.org/10.13039/100013461",
|
||||
"name": "Arts and Disability Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100013548",
|
||||
"uri": "http://dx.doi.org/10.13039/100013548",
|
||||
"name": "Filmbase",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100013917",
|
||||
"uri": "http://dx.doi.org/10.13039/100013917",
|
||||
"name": "Society for Musicology in Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100014251",
|
||||
"uri": "http://dx.doi.org/10.13039/100014251",
|
||||
"name": "Humanities in the European Research Area",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100014364",
|
||||
"uri": "http://dx.doi.org/10.13039/100014364",
|
||||
"name": "National Children's Research Centre",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100014384",
|
||||
"uri": "http://dx.doi.org/10.13039/100014384",
|
||||
"name": "Amarin Corporation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100014902",
|
||||
"uri": "http://dx.doi.org/10.13039/100014902",
|
||||
"name": "Irish Association for Cancer Research",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015023",
|
||||
"uri": "http://dx.doi.org/10.13039/100015023",
|
||||
"name": "Ireland Funds",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015037",
|
||||
"uri": "http://dx.doi.org/10.13039/100015037",
|
||||
"name": "Simon Cumbers Media Fund",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015319",
|
||||
"uri": "http://dx.doi.org/10.13039/100015319",
|
||||
"name": "Sport Ireland Institute",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015320",
|
||||
"uri": "http://dx.doi.org/10.13039/100015320",
|
||||
"name": "Paralympics Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015442",
|
||||
"uri": "http://dx.doi.org/10.13039/100015442",
|
||||
"name": "Global Brain Health Institute",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015776",
|
||||
"uri": "http://dx.doi.org/10.13039/100015776",
|
||||
"name": "Health and Social Care Board",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015992",
|
||||
"uri": "http://dx.doi.org/10.13039/100015992",
|
||||
"name": "St. Luke's Institute of Cancer Research",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100017897",
|
||||
"uri": "http://dx.doi.org/10.13039/100017897",
|
||||
"name": "Friedreich\u2019s Ataxia Research Alliance Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018064",
|
||||
"uri": "http://dx.doi.org/10.13039/100018064",
|
||||
"name": "Department of Tourism, Culture, Arts, Gaeltacht, Sport and Media",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018172",
|
||||
"uri": "http://dx.doi.org/10.13039/100018172",
|
||||
"name": "Department of the Environment, Climate and Communications",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018175",
|
||||
"uri": "http://dx.doi.org/10.13039/100018175",
|
||||
"name": "Dairy Processing Technology Centre",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018270",
|
||||
"uri": "http://dx.doi.org/10.13039/100018270",
|
||||
"name": "Health Service Executive",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018529",
|
||||
"uri": "http://dx.doi.org/10.13039/100018529",
|
||||
"name": "Alkermes",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018542",
|
||||
"uri": "http://dx.doi.org/10.13039/100018542",
|
||||
"name": "Irish Endocrine Society",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018754",
|
||||
"uri": "http://dx.doi.org/10.13039/100018754",
|
||||
"name": "An Roinn Sl\u00e1inte",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100018998",
|
||||
"uri": "http://dx.doi.org/10.13039/100018998",
|
||||
"name": "Irish Research eLibrary",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100019428",
|
||||
"uri": "http://dx.doi.org/10.13039/100019428",
|
||||
"name": "Nabriva Therapeutics",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100019637",
|
||||
"uri": "http://dx.doi.org/10.13039/100019637",
|
||||
"name": "Horizon Therapeutics",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100020174",
|
||||
"uri": "http://dx.doi.org/10.13039/100020174",
|
||||
"name": "Health Research Charities Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100020202",
|
||||
"uri": "http://dx.doi.org/10.13039/100020202",
|
||||
"name": "UCD Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100020233",
|
||||
"uri": "http://dx.doi.org/10.13039/100020233",
|
||||
"name": "Ireland Canada University Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100022943",
|
||||
"uri": "http://dx.doi.org/10.13039/100022943",
|
||||
"name": "National Cancer Registry Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001581",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001581",
|
||||
"name": "Arts Council of Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001582",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001582",
|
||||
"name": "Centre for Ageing Research and Development in Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001583",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001583",
|
||||
"name": "Cystinosis Foundation Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001584",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001584",
|
||||
"name": "Department of Agriculture, Food and the Marine, Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001586",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001586",
|
||||
"name": "Department of Education and Skills, Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001587",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001587",
|
||||
"name": "Economic and Social Research Institute",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001588",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001588",
|
||||
"name": "Enterprise Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001589",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001589",
|
||||
"name": "Environmental Protection Agency",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001591",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001591",
|
||||
"name": "Heritage Council",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001592",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001592",
|
||||
"name": "Higher Education Authority",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001593",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001593",
|
||||
"name": "Irish Cancer Society",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001594",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001594",
|
||||
"name": "Irish Heart Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001595",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001595",
|
||||
"name": "Irish Hospice Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001596",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001596",
|
||||
"name": "Irish Research Council for Science, Engineering and Technology",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001597",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001597",
|
||||
"name": "Irish Research Council for the Humanities and Social Sciences",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001598",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001598",
|
||||
"name": "Mental Health Commission",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001600",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001600",
|
||||
"name": "Research and Education Foundation, Sligo General Hospital",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001601",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001601",
|
||||
"name": "Royal Irish Academy",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001603",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001603",
|
||||
"name": "Sustainable Energy Authority of Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001604",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001604",
|
||||
"name": "Teagasc",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001627",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001627",
|
||||
"name": "Marine Institute",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001628",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001628",
|
||||
"name": "Central Remedial Clinic",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001629",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001629",
|
||||
"name": "Royal Dublin Society",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001630",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001630",
|
||||
"name": "Dublin Institute for Advanced Studies",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001631",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001631",
|
||||
"name": "University College Dublin",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001633",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001633",
|
||||
"name": "National University of Ireland, Maynooth",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001634",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001634",
|
||||
"name": "University of Galway",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001635",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001635",
|
||||
"name": "University of Limerick",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001636",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001636",
|
||||
"name": "University College Cork",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001637",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001637",
|
||||
"name": "Trinity College Dublin",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001638",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001638",
|
||||
"name": "Dublin City University",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100002081",
|
||||
"uri": "http://dx.doi.org/10.13039/501100002081",
|
||||
"name": "Irish Research Council",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100002736",
|
||||
"uri": "http://dx.doi.org/10.13039/501100002736",
|
||||
"name": "Covidien",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100002755",
|
||||
"uri": "http://dx.doi.org/10.13039/501100002755",
|
||||
"name": "Brennan and Company",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100002919",
|
||||
"uri": "http://dx.doi.org/10.13039/501100002919",
|
||||
"name": "Cork Institute of Technology",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100002959",
|
||||
"uri": "http://dx.doi.org/10.13039/501100002959",
|
||||
"name": "Dublin City Council",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100003036",
|
||||
"uri": "http://dx.doi.org/10.13039/501100003036",
|
||||
"name": "Perrigo Company Charitable Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100003037",
|
||||
"uri": "http://dx.doi.org/10.13039/501100003037",
|
||||
"name": "Elan",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100003496",
|
||||
"uri": "http://dx.doi.org/10.13039/501100003496",
|
||||
"name": "HeyStaks Technologies",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100003553",
|
||||
"uri": "http://dx.doi.org/10.13039/501100003553",
|
||||
"name": "Gaelic Athletic Association",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100003840",
|
||||
"uri": "http://dx.doi.org/10.13039/501100003840",
|
||||
"name": "Irish Institute of Clinical Neuroscience",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100003956",
|
||||
"uri": "http://dx.doi.org/10.13039/501100003956",
|
||||
"name": "Aspect Medical Systems",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100004162",
|
||||
"uri": "http://dx.doi.org/10.13039/501100004162",
|
||||
"name": "Meath Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100004210",
|
||||
"uri": "http://dx.doi.org/10.13039/501100004210",
|
||||
"name": "Our Lady's Children's Hospital, Crumlin",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100004321",
|
||||
"uri": "http://dx.doi.org/10.13039/501100004321",
|
||||
"name": "Shire",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100004981",
|
||||
"uri": "http://dx.doi.org/10.13039/501100004981",
|
||||
"name": "Athlone Institute of Technology",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100006518",
|
||||
"uri": "http://dx.doi.org/10.13039/501100006518",
|
||||
"name": "Department of Communications, Energy and Natural Resources, Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100006553",
|
||||
"uri": "http://dx.doi.org/10.13039/501100006553",
|
||||
"name": "Collaborative Centre for Applied Nanotechnology",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100006759",
|
||||
"uri": "http://dx.doi.org/10.13039/501100006759",
|
||||
"name": "CLARITY Centre for Sensor Web Technologies",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100009246",
|
||||
"uri": "http://dx.doi.org/10.13039/501100009246",
|
||||
"name": "Technological University Dublin",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100009269",
|
||||
"uri": "http://dx.doi.org/10.13039/501100009269",
|
||||
"name": "Programme of Competitive Forestry Research for Development",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100009315",
|
||||
"uri": "http://dx.doi.org/10.13039/501100009315",
|
||||
"name": "Cystinosis Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100010808",
|
||||
"uri": "http://dx.doi.org/10.13039/501100010808",
|
||||
"name": "Geological Survey of Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100011030",
|
||||
"uri": "http://dx.doi.org/10.13039/501100011030",
|
||||
"name": "Alimentary Glycoscience Research Cluster",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100011031",
|
||||
"uri": "http://dx.doi.org/10.13039/501100011031",
|
||||
"name": "Alimentary Health",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100011103",
|
||||
"uri": "http://dx.doi.org/10.13039/501100011103",
|
||||
"name": "Rann\u00eds",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100012354",
|
||||
"uri": "http://dx.doi.org/10.13039/501100012354",
|
||||
"name": "Inland Fisheries Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100014384",
|
||||
"uri": "http://dx.doi.org/10.13039/501100014384",
|
||||
"name": "X-Bolt Orthopaedics",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100014710",
|
||||
"uri": "http://dx.doi.org/10.13039/501100014710",
|
||||
"name": "PrecisionBiotics Group",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100014827",
|
||||
"uri": "http://dx.doi.org/10.13039/501100014827",
|
||||
"name": "Dormant Accounts Fund",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100016041",
|
||||
"uri": "http://dx.doi.org/10.13039/501100016041",
|
||||
"name": "St Vincents Anaesthesia Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100017501",
|
||||
"uri": "http://dx.doi.org/10.13039/501100017501",
|
||||
"name": "FotoNation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100018641",
|
||||
"uri": "http://dx.doi.org/10.13039/501100018641",
|
||||
"name": "Dairy Research Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100018839",
|
||||
"uri": "http://dx.doi.org/10.13039/501100018839",
|
||||
"name": "Irish Centre for High-End Computing",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100019905",
|
||||
"uri": "http://dx.doi.org/10.13039/501100019905",
|
||||
"name": "Galway University Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100020036",
|
||||
"uri": "http://dx.doi.org/10.13039/501100020036",
|
||||
"name": "Dystonia Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100020221",
|
||||
"uri": "http://dx.doi.org/10.13039/501100020221",
|
||||
"name": "Irish Motor Neurone Disease Association",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100020270",
|
||||
"uri": "http://dx.doi.org/10.13039/501100020270",
|
||||
"name": "Advanced Materials and Bioengineering Research",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100020403",
|
||||
"uri": "http://dx.doi.org/10.13039/501100020403",
|
||||
"name": "Irish Composites Centre",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100020425",
|
||||
"uri": "http://dx.doi.org/10.13039/501100020425",
|
||||
"name": "Irish Thoracic Society",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100021102",
|
||||
"uri": "http://dx.doi.org/10.13039/501100021102",
|
||||
"name": "Waterford Institute of Technology",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100021110",
|
||||
"uri": "http://dx.doi.org/10.13039/501100021110",
|
||||
"name": "Irish MPS Society",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100021525",
|
||||
"uri": "http://dx.doi.org/10.13039/501100021525",
|
||||
"name": "Insight SFI Research Centre for Data Analytics",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100021694",
|
||||
"uri": "http://dx.doi.org/10.13039/501100021694",
|
||||
"name": "Elan Pharma International",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100021838",
|
||||
"uri": "http://dx.doi.org/10.13039/501100021838",
|
||||
"name": "Royal College of Physicians of Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100022542",
|
||||
"uri": "http://dx.doi.org/10.13039/501100022542",
|
||||
"name": "Breakthrough Cancer Research",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100022610",
|
||||
"uri": "http://dx.doi.org/10.13039/501100022610",
|
||||
"name": "Breast Cancer Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100022728",
|
||||
"uri": "http://dx.doi.org/10.13039/501100022728",
|
||||
"name": "Munster Technological University",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100022729",
|
||||
"uri": "http://dx.doi.org/10.13039/501100022729",
|
||||
"name": "Institute of Technology, Tralee",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100023273",
|
||||
"uri": "http://dx.doi.org/10.13039/501100023273",
|
||||
"name": "HRB Clinical Research Facility Galway",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100023378",
|
||||
"uri": "http://dx.doi.org/10.13039/501100023378",
|
||||
"name": "Lauritzson Foundation",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100023551",
|
||||
"uri": "http://dx.doi.org/10.13039/501100023551",
|
||||
"name": "Cystic Fibrosis Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100023970",
|
||||
"uri": "http://dx.doi.org/10.13039/501100023970",
|
||||
"name": "Tyndall National Institute",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100024094",
|
||||
"uri": "http://dx.doi.org/10.13039/501100024094",
|
||||
"name": "Raidi\u00f3 Teilif\u00eds \u00c9ireann",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100024242",
|
||||
"uri": "http://dx.doi.org/10.13039/501100024242",
|
||||
"name": "Synthesis and Solid State Pharmaceutical Centre",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100024313",
|
||||
"uri": "http://dx.doi.org/10.13039/501100024313",
|
||||
"name": "Irish Rugby Football Union",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100007490",
|
||||
"uri": "http://dx.doi.org/10.13039/100007490",
|
||||
"name": "Bausch and Lomb Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100007819",
|
||||
"uri": "http://dx.doi.org/10.13039/100007819",
|
||||
"name": "Allergan",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100010547",
|
||||
"uri": "http://dx.doi.org/10.13039/100010547",
|
||||
"name": "Irish Youth Justice Service",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100012733",
|
||||
"uri": "http://dx.doi.org/10.13039/100012733",
|
||||
"name": "National Parks and Wildlife Service",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100015278",
|
||||
"uri": "http://dx.doi.org/10.13039/100015278",
|
||||
"name": "Pfizer Healthcare Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100017144",
|
||||
"uri": "http://dx.doi.org/10.13039/100017144",
|
||||
"name": "Shell E and P Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "100022895",
|
||||
"uri": "http://dx.doi.org/10.13039/100022895",
|
||||
"name": "Health Research Institute, University of Limerick",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100001599",
|
||||
"uri": "http://dx.doi.org/10.13039/501100001599",
|
||||
"name": "National Council for Forest Research and Development",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100006554",
|
||||
"uri": "http://dx.doi.org/10.13039/501100006554",
|
||||
"name": "IDA Ireland",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100011626",
|
||||
"uri": "http://dx.doi.org/10.13039/501100011626",
|
||||
"name": "Energy Policy Research Centre, Economic and Social Research Institute",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100014531",
|
||||
"uri": "http://dx.doi.org/10.13039/501100014531",
|
||||
"name": "Physical Education and Sport Sciences Department, University of Limerick",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100014745",
|
||||
"uri": "http://dx.doi.org/10.13039/501100014745",
|
||||
"name": "APC Microbiome Institute",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100014826",
|
||||
"uri": "http://dx.doi.org/10.13039/501100014826",
|
||||
"name": "ADAPT - Centre for Digital Content Technology",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100020570",
|
||||
"uri": "http://dx.doi.org/10.13039/501100020570",
|
||||
"name": "College of Medicine, Nursing and Health Sciences, National University of Ireland, Galway",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100020871",
|
||||
"uri": "http://dx.doi.org/10.13039/501100020871",
|
||||
"name": "Bernal Institute, University of Limerick",
|
||||
"synonym": []
|
||||
},
|
||||
{
|
||||
"id": "501100023852",
|
||||
"uri": "http://dx.doi.org/10.13039/501100023852",
|
||||
"name": "Moore Institute for Research in the Humanities and Social Studies, University of Galway",
|
||||
"synonym": []
|
||||
}
|
||||
]
|
|
@ -0,0 +1,44 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 </value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
</configuration>
|
|
@ -0,0 +1,54 @@
|
|||
<workflow-app name="generate_crossref_Datasource" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>The base path of MAG DUMP CSV Tables</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>The base path of MAG DUMP CSV Tables</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>The Information service Lookup URL</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="generateOAF"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
|
||||
|
||||
|
||||
<action name="generateOAF">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Crossref TO OAF</name>
|
||||
<class>eu.dnetlib.dhp.collection.crossref.SparkMapDumpIntoOAF</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -1,10 +1,17 @@
|
|||
package eu.dnetlib.dhp.collection.crossref
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.{field, qualifier, structuredProperty, subject}
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{DoiCleaningRule, GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{
|
||||
DoiCleaningRule,
|
||||
GraphCleaningFunctions,
|
||||
IdentifierFactory,
|
||||
OafMapperUtils,
|
||||
PidType
|
||||
}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang.StringUtils
|
||||
import org.json4s
|
||||
|
@ -37,18 +44,33 @@ case class funderInfo(id: String, uri: String, name: String, synonym: List[Strin
|
|||
|
||||
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
||||
|
||||
case class CrossrefResult(oafType: String, body: String) {}
|
||||
|
||||
case object Crossref2Oaf {
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
val mapper = new ObjectMapper
|
||||
|
||||
val irishFunder: List[funderInfo] = {
|
||||
val s = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json"))
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/irish_funder.json"))
|
||||
.mkString
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: org.json4s.JValue = parse(s)
|
||||
json.extract[List[funderInfo]]
|
||||
}
|
||||
|
||||
val invalidName = List(
|
||||
",",
|
||||
"none none",
|
||||
"none, none",
|
||||
"none &na;",
|
||||
"(:null)",
|
||||
"test test test",
|
||||
"test test",
|
||||
"test",
|
||||
"&na; &na;"
|
||||
)
|
||||
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
val id = doi.split("/").last
|
||||
irishFunder
|
||||
|
@ -64,8 +86,9 @@ case object Crossref2Oaf {
|
|||
cf
|
||||
|
||||
}
|
||||
|
||||
def generateDataInfo(): DataInfo = {
|
||||
generateDataInfo("0.9")
|
||||
generateDataInfo("0.91")
|
||||
}
|
||||
|
||||
def generateDataInfo(trust: String): DataInfo = {
|
||||
|
@ -84,6 +107,7 @@ case object Crossref2Oaf {
|
|||
)
|
||||
di
|
||||
}
|
||||
|
||||
def getOpenAccessQualifier(): AccessRight = {
|
||||
|
||||
OafMapperUtils.accessRight(
|
||||
|
@ -102,6 +126,7 @@ case object Crossref2Oaf {
|
|||
ModelConstants.DNET_ACCESS_MODES
|
||||
)
|
||||
}
|
||||
|
||||
def getUnknownQualifier(): AccessRight = {
|
||||
OafMapperUtils.accessRight(
|
||||
ModelConstants.UNKNOWN,
|
||||
|
@ -138,16 +163,16 @@ case object Crossref2Oaf {
|
|||
//CC licenses
|
||||
if (
|
||||
license.startsWith("cc") ||
|
||||
license.startsWith("http://creativecommons.org/licenses") ||
|
||||
license.startsWith("https://creativecommons.org/licenses") ||
|
||||
license.startsWith("http://creativecommons.org/licenses") ||
|
||||
license.startsWith("https://creativecommons.org/licenses") ||
|
||||
|
||||
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
|
||||
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
|
||||
|
||||
//APA (considered OPEN also by Unpaywall)
|
||||
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
|
||||
//APA (considered OPEN also by Unpaywall)
|
||||
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
|
||||
) {
|
||||
|
||||
val oaq: AccessRight = getOpenAccessQualifier()
|
||||
|
@ -197,23 +222,92 @@ case object Crossref2Oaf {
|
|||
|
||||
}
|
||||
|
||||
def isValidAuthorName(fullName: String): Boolean = {
|
||||
if (fullName == null || fullName.isEmpty)
|
||||
return false
|
||||
if (invalidName.contains(fullName.toLowerCase.trim))
|
||||
return false
|
||||
true
|
||||
}
|
||||
|
||||
def filterResult(publication: Result): Boolean = {
|
||||
|
||||
def mappingResult(result: Result, json: JValue, instanceType:Qualifier, originalType: String): Result = {
|
||||
//Case empty publication
|
||||
if (publication == null)
|
||||
return false
|
||||
if (publication.getId == null || publication.getId.isEmpty)
|
||||
return false
|
||||
|
||||
//Case publication with no title
|
||||
if (publication.getTitle == null || publication.getTitle.size == 0)
|
||||
return false
|
||||
|
||||
val s = publication.getTitle.asScala.count(p =>
|
||||
p.getValue != null
|
||||
&& p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]")
|
||||
)
|
||||
|
||||
if (s == 0)
|
||||
return false
|
||||
|
||||
// fixes #4360 (test publisher)
|
||||
val publisher =
|
||||
if (publication.getPublisher != null) publication.getPublisher.getValue else null
|
||||
|
||||
if (
|
||||
publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher
|
||||
.equalsIgnoreCase("CrossRef Test Account"))
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//RELAXED this constraint
|
||||
//Publication with no Author
|
||||
if (publication.getAuthor == null || publication.getAuthor.size() == 0)
|
||||
return true
|
||||
|
||||
//filter invalid author
|
||||
val authors = publication.getAuthor.asScala.map(s => {
|
||||
if (s.getFullname.nonEmpty) {
|
||||
s.getFullname
|
||||
} else
|
||||
s"${s.getName} ${s.getSurname}"
|
||||
})
|
||||
|
||||
val c = authors.count(isValidAuthorName)
|
||||
if (c == 0)
|
||||
return false
|
||||
|
||||
// fixes #4368
|
||||
if (
|
||||
authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(
|
||||
publication.getPublisher.getValue
|
||||
)
|
||||
)
|
||||
return false
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
def mappingResult(result: Result, json: JValue, instanceType: Qualifier, originalType: String): Result = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
val doi: String = DoiCleaningRule.normalizeDoi((json \ "DOI").extract[String])
|
||||
result.setPid(List(structuredProperty(
|
||||
doi,
|
||||
qualifier(
|
||||
PidType.doi.toString,
|
||||
PidType.doi.toString,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES
|
||||
),
|
||||
null
|
||||
)).asJava)
|
||||
result.setPid(
|
||||
List(
|
||||
structuredProperty(
|
||||
doi,
|
||||
qualifier(
|
||||
PidType.doi.toString,
|
||||
PidType.doi.toString,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES
|
||||
),
|
||||
null
|
||||
)
|
||||
).asJava
|
||||
)
|
||||
|
||||
//MAPPING Crossref DOI into OriginalId
|
||||
//and Other Original Identifier of dataset like clinical-trial-number
|
||||
|
@ -251,7 +345,9 @@ case object Crossref2Oaf {
|
|||
val subtitles =
|
||||
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
|
||||
title,
|
||||
ModelConstants.SUBTITLE_QUALIFIER, null)
|
||||
ModelConstants.SUBTITLE_QUALIFIER,
|
||||
null
|
||||
)
|
||||
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
||||
|
||||
// DESCRIPTION
|
||||
|
@ -302,9 +398,9 @@ case object Crossref2Oaf {
|
|||
(json \ "issued" \ "date-parts").extract[List[List[Int]]]
|
||||
)
|
||||
if (StringUtils.isNotBlank(issuedDate)) {
|
||||
result.setDateofacceptance(field(issuedDate,null))
|
||||
result.setDateofacceptance(field(issuedDate, null))
|
||||
} else {
|
||||
result.setDateofacceptance(field(createdDate.getValue,null))
|
||||
result.setDateofacceptance(field(createdDate.getValue, null))
|
||||
}
|
||||
result.setRelevantdate(
|
||||
List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate)
|
||||
|
@ -317,7 +413,7 @@ case object Crossref2Oaf {
|
|||
|
||||
if (subjectList.nonEmpty) {
|
||||
result.setSubject(
|
||||
subjectList.map(s => subject(s,ModelConstants.SUBTITLE_QUALIFIER,null)).asJava
|
||||
subjectList.map(s => subject(s, ModelConstants.SUBTITLE_QUALIFIER, null)).asJava
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -339,7 +435,7 @@ case object Crossref2Oaf {
|
|||
JObject(license) <- json \ "license"
|
||||
JField("URL", JString(lic)) <- license
|
||||
JField("content-version", JString(content_version)) <- license
|
||||
} yield (field[String](lic,null), content_version)
|
||||
} yield (field[String](lic, null), content_version)
|
||||
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
|
||||
if (l.nonEmpty) {
|
||||
if (l exists (d => d._2.equals("vor"))) {
|
||||
|
@ -382,9 +478,9 @@ case object Crossref2Oaf {
|
|||
|
||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||
if (StringUtils.isNotBlank(issuedDate)) {
|
||||
instance.setDateofacceptance(field(issuedDate,null))
|
||||
instance.setDateofacceptance(field(issuedDate, null))
|
||||
} else {
|
||||
instance.setDateofacceptance(field(createdDate.getValue,null))
|
||||
instance.setDateofacceptance(field(createdDate.getValue, null))
|
||||
}
|
||||
val s: List[String] = List("https://doi.org/" + doi)
|
||||
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
||||
|
@ -415,6 +511,7 @@ case object Crossref2Oaf {
|
|||
else
|
||||
result
|
||||
}
|
||||
|
||||
def generateIdentifier(oaf: Result, doi: String): String = {
|
||||
val id = DHPUtils.md5(doi.toLowerCase)
|
||||
s"50|doiboost____|$id"
|
||||
|
@ -431,7 +528,12 @@ case object Crossref2Oaf {
|
|||
List(
|
||||
structuredProperty(
|
||||
orcid,
|
||||
qualifier( ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES),
|
||||
qualifier(
|
||||
ModelConstants.ORCID_PENDING,
|
||||
ModelConstants.ORCID_PENDING,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES
|
||||
),
|
||||
generateDataInfo()
|
||||
)
|
||||
).asJava
|
||||
|
@ -441,22 +543,22 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
/** *
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
def getTypeQualifier(
|
||||
resourceType: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): (Qualifier, Qualifier, String) = {
|
||||
resourceType: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): (Qualifier, Qualifier, String) = {
|
||||
if (resourceType != null && resourceType.nonEmpty) {
|
||||
val typeQualifier =
|
||||
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||
|
@ -473,17 +575,16 @@ case object Crossref2Oaf {
|
|||
null
|
||||
}
|
||||
|
||||
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
||||
def convert(input: String, vocabularies: VocabularyGroup): List[CrossrefResult] = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
var resultList: List[Oaf] = List()
|
||||
var resultList: List[CrossrefResult] = List()
|
||||
|
||||
val objectType = (json \ "type").extractOrElse[String](null)
|
||||
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
||||
if (objectType == null)
|
||||
return resultList
|
||||
val typology =getTypeQualifier(objectType, vocabularies)
|
||||
val typology = getTypeQualifier(objectType, vocabularies)
|
||||
|
||||
if (typology == null)
|
||||
return List()
|
||||
|
@ -507,7 +608,7 @@ case object Crossref2Oaf {
|
|||
createCrossrefCollectedFrom(),
|
||||
result.getDataInfo,
|
||||
result.getLastupdatetimestamp
|
||||
)
|
||||
).map(s => CrossrefResult(s.getClass.getSimpleName, mapper.writeValueAsString(s)))
|
||||
}
|
||||
|
||||
result match {
|
||||
|
@ -522,10 +623,15 @@ case object Crossref2Oaf {
|
|||
|
||||
if (doisReference != null && doisReference.nonEmpty) {
|
||||
val citation_relations: List[Relation] = generateCitationRelations(doisReference, result)
|
||||
resultList = resultList ::: citation_relations
|
||||
resultList = resultList ::: citation_relations.map(s =>
|
||||
CrossrefResult(s.getClass.getSimpleName, mapper.writeValueAsString(s))
|
||||
)
|
||||
}
|
||||
resultList = resultList ::: List(result)
|
||||
resultList
|
||||
if (!filterResult(result))
|
||||
List()
|
||||
else
|
||||
resultList ::: List(result).map(s => CrossrefResult(s.getClass.getSimpleName, mapper.writeValueAsString(s)))
|
||||
|
||||
}
|
||||
|
||||
private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = {
|
||||
|
@ -752,10 +858,10 @@ case object Crossref2Oaf {
|
|||
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
||||
if (publication.getSource != null) {
|
||||
val l: List[Field[String]] = publication.getSource.asScala.toList
|
||||
val ll: List[Field[String]] = l ::: List(field(source,null))
|
||||
val ll: List[Field[String]] = l ::: List(field(source, null))
|
||||
publication.setSource(ll.asJava)
|
||||
} else
|
||||
publication.setSource(List(field(source,null)).asJava)
|
||||
publication.setSource(List(field(source, null)).asJava)
|
||||
}
|
||||
} else {
|
||||
// Mapping Journal
|
||||
|
@ -823,7 +929,7 @@ case object Crossref2Oaf {
|
|||
): StructuredProperty = {
|
||||
val dp = extractDate(dt, datePart)
|
||||
if (StringUtils.isNotBlank(dp))
|
||||
return structuredProperty(dp, qualifier(classId,classId, schemeId, schemeId),null)
|
||||
return structuredProperty(dp, qualifier(classId, classId, schemeId, schemeId), null)
|
||||
null
|
||||
}
|
||||
|
||||
|
@ -836,12 +942,11 @@ case object Crossref2Oaf {
|
|||
val item = new Dataset
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}
|
||||
else if (objectType.getClassid.equalsIgnoreCase("software")){
|
||||
} else if (objectType.getClassid.equalsIgnoreCase("software")) {
|
||||
val item = new Software
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")){
|
||||
} else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")) {
|
||||
val item = new OtherResearchProduct
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
package eu.dnetlib.dhp.collection.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.hadoop.io.{IntWritable, Text}
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.expressions.Aggregator
|
||||
import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession}
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
object CrossrefDataset {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(CrossrefDataset.getClass)
|
||||
|
||||
def to_item(input: String): CrossrefDT = {
|
||||
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
val ts: Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||
val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||
CrossrefDT(doi, input, ts)
|
||||
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(
|
||||
IOUtils.toString(
|
||||
CrossrefDataset.getClass.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"
|
||||
)
|
||||
)
|
||||
)
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(CrossrefDataset.getClass.getSimpleName)
|
||||
.master(parser.get("master"))
|
||||
.getOrCreate()
|
||||
import spark.implicits._
|
||||
|
||||
val crossrefAggregator = new Aggregator[CrossrefDT, CrossrefDT, CrossrefDT] with Serializable {
|
||||
|
||||
override def zero: CrossrefDT = null
|
||||
|
||||
override def reduce(b: CrossrefDT, a: CrossrefDT): CrossrefDT = {
|
||||
if (b == null)
|
||||
return a
|
||||
if (a == null)
|
||||
return b
|
||||
|
||||
if (a.timestamp > b.timestamp) {
|
||||
return a
|
||||
}
|
||||
b
|
||||
}
|
||||
|
||||
override def merge(a: CrossrefDT, b: CrossrefDT): CrossrefDT = {
|
||||
if (b == null)
|
||||
return a
|
||||
if (a == null)
|
||||
return b
|
||||
|
||||
if (a.timestamp > b.timestamp) {
|
||||
return a
|
||||
}
|
||||
b
|
||||
}
|
||||
|
||||
override def bufferEncoder: Encoder[CrossrefDT] = implicitly[Encoder[CrossrefDT]]
|
||||
|
||||
override def outputEncoder: Encoder[CrossrefDT] = implicitly[Encoder[CrossrefDT]]
|
||||
|
||||
override def finish(reduction: CrossrefDT): CrossrefDT = reduction
|
||||
}
|
||||
|
||||
val workingPath: String = parser.get("workingPath")
|
||||
|
||||
val main_ds: Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT]
|
||||
|
||||
val update =
|
||||
spark.createDataset(
|
||||
spark.sparkContext
|
||||
.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text])
|
||||
.map(i => CrossrefImporter.decompressBlob(i._2.toString))
|
||||
.map(i => to_item(i))
|
||||
)
|
||||
|
||||
main_ds
|
||||
.union(update)
|
||||
.groupByKey(_.doi)
|
||||
.agg(crossrefAggregator.toColumn)
|
||||
.map(s => s._2)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$workingPath/crossref_ds_updated")
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -61,6 +61,6 @@ object GenerateCrossrefDataset {
|
|||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(targetPath)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
package eu.dnetlib.dhp.collection.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Dataset => OafDataset}
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||
import org.apache.spark.sql._
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
class SparkMapDumpIntoOAF(propertyPath: String, args: Array[String], log: Logger)
|
||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||
|
||||
/** Here all the spark applications runs this method
|
||||
* where the whole logic of the spark node is defined
|
||||
*/
|
||||
override def run(): Unit = {
|
||||
val sourcePath = parser.get("sourcePath")
|
||||
log.info("sourcePath: {}", sourcePath)
|
||||
val targetPath = parser.get("targetPath")
|
||||
log.info("targetPath: {}", targetPath)
|
||||
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||
log.info("isLookupUrl: {}", isLookupUrl)
|
||||
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
||||
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||
require(vocabularies != null)
|
||||
transformCrossref(spark, sourcePath, targetPath, vocabularies)
|
||||
|
||||
}
|
||||
|
||||
def transformCrossref(
|
||||
spark: SparkSession,
|
||||
sourcePath: String,
|
||||
targetPath: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): Unit = {
|
||||
import spark.implicits._
|
||||
val dump = spark.read.text(sourcePath).as[String]
|
||||
dump
|
||||
.flatMap(s => Crossref2Oaf.convert(s, vocabularies))
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.partitionBy("oafType")
|
||||
.option("compression", "gzip")
|
||||
.text(targetPath)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
object SparkMapDumpIntoOAF {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
|
||||
|
||||
new SparkMapDumpIntoOAF(
|
||||
log = logger,
|
||||
args = args,
|
||||
propertyPath = "/eu/dnetlib/dhp/collection/crossref/convert_crossref_dump_to_oaf_params.json"
|
||||
).initialize().run()
|
||||
}
|
||||
|
||||
}
|
|
@ -14,7 +14,6 @@ import scala.collection.JavaConverters._
|
|||
|
||||
case class MAGPaper(
|
||||
paperId: Option[Long],
|
||||
rank: Option[Int],
|
||||
doi: Option[String],
|
||||
docType: Option[String],
|
||||
paperTitle: Option[String],
|
||||
|
@ -29,7 +28,6 @@ case class MAGPaper(
|
|||
journalName: Option[String],
|
||||
journalIssn: Option[String],
|
||||
journalPublisher: Option[String],
|
||||
journalWebpage: Option[String],
|
||||
conferenceSeriesId: Option[Long],
|
||||
conferenceInstanceId: Option[Long],
|
||||
conferenceName: Option[String],
|
||||
|
@ -51,8 +49,6 @@ case class MAGPaper(
|
|||
abstractText: Option[String],
|
||||
// List of authors
|
||||
authors: Option[List[MAGAuthor]],
|
||||
// List of Fields of Study
|
||||
fos: Option[List[MAGFieldOfStudy]],
|
||||
urls: Option[List[String]]
|
||||
)
|
||||
|
||||
|
@ -65,13 +61,6 @@ case class MAGAuthor(
|
|||
GridId: Option[String]
|
||||
)
|
||||
|
||||
case class MAGFieldOfStudy(
|
||||
FieldOfStudyId: Option[Long],
|
||||
DisplayName: Option[String],
|
||||
MainType: Option[String],
|
||||
Score: Option[Double]
|
||||
)
|
||||
|
||||
object MagUtility extends Serializable {
|
||||
|
||||
val datatypedict = Map(
|
||||
|
|
|
@ -62,9 +62,7 @@ class SparkCreateMagDenormalizedTable(propertyPath: String, args: Array[String],
|
|||
//next step we create a table containing
|
||||
val authors = MagUtility.loadMagEntity(spark, "Authors", magBasePath)
|
||||
val affiliations = MagUtility.loadMagEntity(spark, "Affiliations", magBasePath)
|
||||
val paaf = MagUtility.loadMagEntity(spark, "PaperAuthorAffiliations", magBasePath)
|
||||
|
||||
val paperAuthorAffiliations = paaf.join(step0, paaf("PaperId") === step0("PaperId"), "leftsemi")
|
||||
val paperAuthorAffiliations = MagUtility.loadMagEntity(spark, "PaperAuthorAffiliations", magBasePath)
|
||||
|
||||
val j1 = paperAuthorAffiliations
|
||||
.join(authors, paperAuthorAffiliations("AuthorId") === authors("AuthorId"), "inner")
|
||||
|
@ -116,26 +114,26 @@ class SparkCreateMagDenormalizedTable(propertyPath: String, args: Array[String],
|
|||
step2.count()
|
||||
step1.unpersist()
|
||||
|
||||
val fos = MagUtility
|
||||
.loadMagEntity(spark, "FieldsOfStudy", magBasePath)
|
||||
.select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType")
|
||||
|
||||
val paperFieldsOfStudy = MagUtility
|
||||
.loadMagEntity(spark, "PaperFieldsOfStudy", magBasePath)
|
||||
.select($"FieldOfStudyId", $"Score", $"PaperId")
|
||||
|
||||
val paperFoS = paperFieldsOfStudy
|
||||
.join(broadcast(fos), fos("fos") === paperFieldsOfStudy("FieldOfStudyId"))
|
||||
.groupBy("PaperId")
|
||||
.agg(collect_set(struct("FieldOfStudyId", "DisplayName", "MainType", "Score")).as("FoS"))
|
||||
|
||||
val step3 = step2
|
||||
.join(paperFoS, step2("PaperId") === paperFoS("PaperId"), "left")
|
||||
.select(step2("*"), paperFoS("FoS"))
|
||||
.cache()
|
||||
step3.count()
|
||||
|
||||
step2.unpersist()
|
||||
// val fos = MagUtility
|
||||
// .loadMagEntity(spark, "FieldsOfStudy", magBasePath)
|
||||
// .select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType")
|
||||
//
|
||||
// val paperFieldsOfStudy = MagUtility
|
||||
// .loadMagEntity(spark, "PaperFieldsOfStudy", magBasePath)
|
||||
// .select($"FieldOfStudyId", $"Score", $"PaperId")
|
||||
//
|
||||
// val paperFoS = paperFieldsOfStudy
|
||||
// .join(broadcast(fos), fos("fos") === paperFieldsOfStudy("FieldOfStudyId"))
|
||||
// .groupBy("PaperId")
|
||||
// .agg(collect_set(struct("FieldOfStudyId", "DisplayName", "MainType", "Score")).as("FoS"))
|
||||
//
|
||||
// val step3 = step2
|
||||
// .join(paperFoS, step2("PaperId") === paperFoS("PaperId"), "left")
|
||||
// .select(step2("*"), paperFoS("FoS"))
|
||||
// .cache()
|
||||
// step3.count()
|
||||
//
|
||||
// step2.unpersist()
|
||||
|
||||
val journals = MagUtility
|
||||
.loadMagEntity(spark, "Journals", magBasePath)
|
||||
|
@ -143,20 +141,18 @@ class SparkCreateMagDenormalizedTable(propertyPath: String, args: Array[String],
|
|||
$"JournalId",
|
||||
$"DisplayName".as("journalName"),
|
||||
$"Issn".as("journalIssn"),
|
||||
$"Publisher".as("journalPublisher"),
|
||||
$"Webpage".as("journalWebpage")
|
||||
$"Publisher".as("journalPublisher")
|
||||
)
|
||||
val step4 = step3
|
||||
.join(journals, step3("JournalId") === journals("JournalId"), "left")
|
||||
val step3 = step2
|
||||
.join(journals, step2("JournalId") === journals("JournalId"), "left")
|
||||
.select(
|
||||
step3("*"),
|
||||
step2("*"),
|
||||
journals("journalName"),
|
||||
journals("journalIssn"),
|
||||
journals("journalPublisher"),
|
||||
journals("journalWebpage")
|
||||
journals("journalPublisher")
|
||||
)
|
||||
.cache
|
||||
step4.count()
|
||||
step3.count()
|
||||
|
||||
val paper_urls = MagUtility
|
||||
.loadMagEntity(spark, "PaperUrls", magBasePath)
|
||||
|
@ -166,12 +162,11 @@ class SparkCreateMagDenormalizedTable(propertyPath: String, args: Array[String],
|
|||
|
||||
paper_urls.count
|
||||
|
||||
step4
|
||||
.join(paper_urls, step4("PaperId") === paper_urls("PaperId"))
|
||||
.select(step4("*"), paper_urls("urls"))
|
||||
step3
|
||||
.join(paper_urls, step3("PaperId") === paper_urls("PaperId"))
|
||||
.select(step3("*"), paper_urls("urls"))
|
||||
.select(
|
||||
$"PaperId".as("paperId"),
|
||||
$"Rank".as("rank"),
|
||||
$"Doi".as("doi"),
|
||||
$"DocType".as("docType"),
|
||||
$"PaperTitle".as("paperTitle"),
|
||||
|
@ -202,11 +197,9 @@ class SparkCreateMagDenormalizedTable(propertyPath: String, args: Array[String],
|
|||
$"conferenceLocation".as("conferenceLocation"),
|
||||
$"conferenceStartDate".as("conferenceStartDate"),
|
||||
$"conferenceEndDate".as("conferenceEndDate"),
|
||||
$"FoS".as("fos"),
|
||||
$"journalName".as("journalName"),
|
||||
$"journalIssn".as("journalIssn"),
|
||||
$"journalPublisher".as("journalPublisher"),
|
||||
$"journalWebpage".as("journalWebpage"),
|
||||
$"urls"
|
||||
)
|
||||
.write
|
||||
|
|
|
@ -0,0 +1,849 @@
|
|||
{
|
||||
"URL": "http://dx.doi.org/10.1016/j.joca.2019.11.002",
|
||||
"resource": {
|
||||
"primary": {
|
||||
"URL": "https://linkinghub.elsevier.com/retrieve/pii/S106345841931266X"
|
||||
}
|
||||
},
|
||||
"member": "78",
|
||||
"score": 0.0,
|
||||
"created": {
|
||||
"date-parts": [
|
||||
[
|
||||
2019,
|
||||
11,
|
||||
14
|
||||
]
|
||||
],
|
||||
"date-time": "2019-11-14T18:08:38Z",
|
||||
"timestamp": 1573754918000
|
||||
},
|
||||
"update-policy": "http://dx.doi.org/10.1016/elsevier_cm_policy",
|
||||
"license": [
|
||||
{
|
||||
"start": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
1
|
||||
]
|
||||
],
|
||||
"date-time": "2020-05-01T00:00:00Z",
|
||||
"timestamp": 1588291200000
|
||||
},
|
||||
"content-version": "tdm",
|
||||
"delay-in-days": 0,
|
||||
"URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
|
||||
},
|
||||
{
|
||||
"start": {
|
||||
"date-parts": [
|
||||
[
|
||||
2021,
|
||||
5,
|
||||
1
|
||||
]
|
||||
],
|
||||
"date-time": "2021-05-01T00:00:00Z",
|
||||
"timestamp": 1619827200000
|
||||
},
|
||||
"content-version": "vor",
|
||||
"delay-in-days": 365,
|
||||
"URL": "http://www.elsevier.com/open-access/userlicense/1.0/"
|
||||
}
|
||||
],
|
||||
"ISSN": [
|
||||
"1063-4584"
|
||||
],
|
||||
"container-title": [
|
||||
"Osteoarthritis and Cartilage"
|
||||
],
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5
|
||||
]
|
||||
]
|
||||
},
|
||||
"issue": "5",
|
||||
"prefix": "10.1016",
|
||||
"reference-count": 50,
|
||||
"indexed": {
|
||||
"date-parts": [
|
||||
[
|
||||
2024,
|
||||
2,
|
||||
27
|
||||
]
|
||||
],
|
||||
"date-time": "2024-02-27T00:38:44Z",
|
||||
"timestamp": 1708994324729
|
||||
},
|
||||
"author": [
|
||||
{
|
||||
"given": "N.",
|
||||
"family": "Sharma",
|
||||
"sequence": "first",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "P.",
|
||||
"family": "Drobinski",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "A.",
|
||||
"family": "Kayed",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "Z.",
|
||||
"family": "Chen",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "C.F.",
|
||||
"family": "Kjelgaard-Petersen",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "T.",
|
||||
"family": "Gantzel",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "M.A.",
|
||||
"family": "Karsdal",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "M.",
|
||||
"family": "Michaelis",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "C.",
|
||||
"family": "Ladel",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "A.C.",
|
||||
"family": "Bay-Jensen",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "S.",
|
||||
"family": "Lindemann",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "C.S.",
|
||||
"family": "Thudium",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
}
|
||||
],
|
||||
"DOI": "10.1016/j.joca.2019.11.002",
|
||||
"is-referenced-by-count": 27,
|
||||
"funder": [
|
||||
{
|
||||
"DOI": "10.13039/501100001732",
|
||||
"name": "Danmarks Grundforskningsfond",
|
||||
"doi-asserted-by": "publisher"
|
||||
}
|
||||
],
|
||||
"published": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5
|
||||
]
|
||||
]
|
||||
},
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5
|
||||
]
|
||||
]
|
||||
},
|
||||
"alternative-id": [
|
||||
"S106345841931266X"
|
||||
],
|
||||
"subject": [
|
||||
"Orthopedics and Sports Medicine",
|
||||
"Biomedical Engineering",
|
||||
"Rheumatology"
|
||||
],
|
||||
"content-domain": {
|
||||
"domain": [
|
||||
"clinicalkey.fr",
|
||||
"clinicalkey.jp",
|
||||
"clinicalkey.es",
|
||||
"clinicalkey.com.au",
|
||||
"oarsijournal.com",
|
||||
"clinicalkey.com",
|
||||
"elsevier.com",
|
||||
"sciencedirect.com"
|
||||
],
|
||||
"crossmark-restriction": true
|
||||
},
|
||||
"reference": [
|
||||
{
|
||||
"key": "10.1016/j.joca.2019.11.002_bib1",
|
||||
"series-title": "Priority Medicines for Europe and the World. 2013 Update",
|
||||
"author": "Kaplan",
|
||||
"year": "2013"
|
||||
},
|
||||
{
|
||||
"issue": "August 2016",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib2",
|
||||
"article-title": "Osteoarthritis: toward a comprehensive understanding of pathological mechanism",
|
||||
"volume": "5",
|
||||
"author": "Chen",
|
||||
"year": "2017",
|
||||
"journal-title": "Bone Res"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "77",
|
||||
"issue": "2",
|
||||
"author": "Sokolove",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib3",
|
||||
"DOI": "10.1177/1759720X12467868",
|
||||
"article-title": "Role of inflammation in the pathogenesis of osteoarthritis: latest findings and interpretations",
|
||||
"year": "2013",
|
||||
"volume": "5",
|
||||
"journal-title": "Ther Adv Musculoskelet Dis"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"issue": "5",
|
||||
"author": "Alberton",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib4",
|
||||
"DOI": "10.3390/ijms20051008",
|
||||
"article-title": "Aggrecan hypomorphism compromises articular cartilage biomechanical properties and is associated with increased incidence of spontaneous osteoarthritis",
|
||||
"year": "2019",
|
||||
"volume": "20",
|
||||
"journal-title": "Int J Mol Sci"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"issue": "6",
|
||||
"author": "Miller",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib5",
|
||||
"DOI": "10.1172/jci.insight.95704",
|
||||
"article-title": "An aggrecan fragment drives osteoarthritis pain through Toll-like receptor 2",
|
||||
"year": "2018",
|
||||
"volume": "3",
|
||||
"journal-title": "JCI Insight"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1240",
|
||||
"issue": "5",
|
||||
"author": "Lees",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib6",
|
||||
"DOI": "10.1002/art.39063",
|
||||
"article-title": "Bioactivity in an aggrecan 32-mer fragment is mediated via toll-like receptor 2",
|
||||
"year": "2015",
|
||||
"volume": "67",
|
||||
"journal-title": "Arthritis Rheum"
|
||||
},
|
||||
{
|
||||
"key": "10.1016/j.joca.2019.11.002_bib7",
|
||||
"article-title": "Increased function of pronociceptive TRPV1 at the level of the joint in a rat model of osteoarthritis pain",
|
||||
"author": "Kelly",
|
||||
"year": "2013",
|
||||
"journal-title": "Ann Rheum Dis"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "580",
|
||||
"issue": "10",
|
||||
"author": "Robinson",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib8",
|
||||
"DOI": "10.1038/nrrheum.2016.136",
|
||||
"article-title": "Low-grade inflammation as a key mediator of the pathogenesis of osteoarthritis",
|
||||
"year": "2016",
|
||||
"volume": "12",
|
||||
"journal-title": "Nat Rev Rheumatol"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "625",
|
||||
"issue": "11",
|
||||
"author": "Sellam",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib9",
|
||||
"DOI": "10.1038/nrrheum.2010.159",
|
||||
"article-title": "The role of synovitis in pathophysiology and clinical symptoms of osteoarthritis",
|
||||
"year": "2010",
|
||||
"volume": "6",
|
||||
"journal-title": "Nat Rev Rheumatol"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "249",
|
||||
"issue": "2",
|
||||
"author": "Scanzello",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib10",
|
||||
"DOI": "10.1016/j.bone.2012.02.012",
|
||||
"article-title": "The role of synovitis in osteoarthritis pathogenesis",
|
||||
"year": "2012",
|
||||
"volume": "51",
|
||||
"journal-title": "Bone"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "33",
|
||||
"issue": "1",
|
||||
"author": "Kapoor",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib11",
|
||||
"DOI": "10.1038/nrrheum.2010.196",
|
||||
"article-title": "Role of proinflammatory cytokines in the pathophysiology of osteoarthritis",
|
||||
"year": "2011",
|
||||
"volume": "7",
|
||||
"journal-title": "Nat Rev Rheumatol"
|
||||
},
|
||||
{
|
||||
"issue": "4",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib12",
|
||||
"first-page": "1",
|
||||
"volume": "18",
|
||||
"author": "Roh",
|
||||
"year": "2018",
|
||||
"journal-title": "Origin and List of Damps"
|
||||
},
|
||||
{
|
||||
"key": "10.1016/j.joca.2019.11.002_bib13",
|
||||
"doi-asserted-by": "crossref",
|
||||
"article-title": "Overexpression of toll-like receptors 3 and 4 in synovial tissue from patients with early rheumatoid arthritis: toll-like receptor expression in early and longstanding arthritis",
|
||||
"author": "Ospelt",
|
||||
"year": "2008",
|
||||
"journal-title": "Arthritis Rheum",
|
||||
"DOI": "10.1002/art.24140"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "338",
|
||||
"issue": "2",
|
||||
"author": "Roelofs",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib14",
|
||||
"DOI": "10.1002/art.23217",
|
||||
"article-title": "The orchestra of toll-like receptors and their potential role in frequently occurring rheumatic conditions",
|
||||
"year": "2008",
|
||||
"volume": "58",
|
||||
"journal-title": "Arthritis Rheum"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "657",
|
||||
"issue": "3",
|
||||
"author": "Gondokaryono",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib15",
|
||||
"DOI": "10.1189/jlb.1206730",
|
||||
"article-title": "The extra domain A of fibronectin stimulates murine mast cells via Toll-like receptor 4",
|
||||
"year": "2007",
|
||||
"volume": "82",
|
||||
"journal-title": "J Leukoc Biol"
|
||||
},
|
||||
{
|
||||
"issue": "7",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib16",
|
||||
"first-page": "2004",
|
||||
"article-title": "Chondrocyte innate immune myeloid differentiation factor 88-dependent signaling drives procatabolic effects of the endogenous toll-like receptor 2/toll-like receptor 4 ligands low molecular weight hyaluronan and high mobility group box chromosomal protein",
|
||||
"volume": "62",
|
||||
"author": "Liu-Bryan",
|
||||
"year": "2010",
|
||||
"journal-title": "Arthritis Rheum"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "774",
|
||||
"issue": "7",
|
||||
"author": "Midwood",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib17",
|
||||
"DOI": "10.1038/nm.1987",
|
||||
"article-title": "Tenascin-C is an endogenous activator of Toll-like receptor 4 that is essential for maintaining inflammation in arthritic joint disease",
|
||||
"year": "2009",
|
||||
"volume": "15",
|
||||
"journal-title": "Nat Med"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "16",
|
||||
"author": "Berenbaum",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib18",
|
||||
"DOI": "10.1016/j.joca.2012.11.012",
|
||||
"article-title": "Osteoarthritis as an inflammatory disease (osteoarthritis is not osteoarthrosis!)",
|
||||
"year": "2013",
|
||||
"volume": "21",
|
||||
"journal-title": "Osteoarthr Cartil"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "259",
|
||||
"issue": "4",
|
||||
"author": "Wang",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib19",
|
||||
"DOI": "10.1186/1756-0500-2-259",
|
||||
"article-title": "Suppression of MMP activity in bovine cartilage explants cultures has little if any effect on the release of aggrecanase-derived aggrecan fragments",
|
||||
"year": "2009",
|
||||
"volume": "2",
|
||||
"journal-title": "BMC Res Notes"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "423",
|
||||
"issue": "5–6",
|
||||
"author": "Bay-Jensen",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib20",
|
||||
"DOI": "10.1016/j.clinbiochem.2011.01.001",
|
||||
"article-title": "Enzyme-linked immunosorbent assay (ELISAs) for metalloproteinase derived type II collagen neoepitope, CIIM-Increased serum CIIM in subjects with severe radiographic osteoarthritis",
|
||||
"year": "2011",
|
||||
"volume": "44",
|
||||
"journal-title": "Clin Biochem"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "18789",
|
||||
"issue": "10",
|
||||
"author": "Gudmann",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib21",
|
||||
"DOI": "10.3390/ijms151018789",
|
||||
"article-title": "Cartilage turnover reflected by metabolic processing of type II collagen: a novel marker of anabolic function in chondrocytes",
|
||||
"year": "2014",
|
||||
"volume": "15",
|
||||
"journal-title": "Int J Mol Sci"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "899",
|
||||
"issue": "10–11",
|
||||
"author": "Barascuk",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib22",
|
||||
"DOI": "10.1016/j.clinbiochem.2010.03.012",
|
||||
"article-title": "A novel assay for extracellular matrix remodeling associated with liver fibrosis: an enzyme-linked immunosorbent assay (ELISA) for a MMP-9 proteolytically revealed neo-epitope of type III collagen",
|
||||
"year": "2010",
|
||||
"volume": "43",
|
||||
"journal-title": "Clin Biochem"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "93",
|
||||
"author": "Sun",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib23",
|
||||
"DOI": "10.1186/1471-2474-15-93",
|
||||
"article-title": "The active form of MMP-3 is a marker of synovial inflammation and cartilage turnover in inflammatory joint diseases",
|
||||
"year": "2014",
|
||||
"volume": "15",
|
||||
"journal-title": "BMC Muscoskelet Disord"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "103",
|
||||
"issue": "1",
|
||||
"author": "Chandrasekhar",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib24",
|
||||
"DOI": "10.1016/0003-2697(87)90658-0",
|
||||
"article-title": "Microdetermination of proteoglycans and glycosaminoglycans in the presence of guanidine hydrochloride",
|
||||
"year": "1987",
|
||||
"volume": "161",
|
||||
"journal-title": "Anal Biochem"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "8722",
|
||||
"issue": "22",
|
||||
"author": "Hankins",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib25",
|
||||
"DOI": "10.1073/pnas.1201313109",
|
||||
"article-title": "Amino acid addition to Vibrio cholerae LPS establishes a link between surface remodeling in Gram-positive and Gram-negative bacteria",
|
||||
"year": "2012",
|
||||
"volume": "109",
|
||||
"journal-title": "Proc Natl Acad Sci"
|
||||
},
|
||||
{
|
||||
"issue": "c",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib26",
|
||||
"first-page": "1",
|
||||
"volume": "2",
|
||||
"author": "Bailie",
|
||||
"year": "2010",
|
||||
"journal-title": "Supporting Information"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1145",
|
||||
"issue": "9",
|
||||
"author": "Neogi",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib27",
|
||||
"DOI": "10.1016/j.joca.2013.03.018",
|
||||
"article-title": "The epidemiology and impact of pain in osteoarthritis",
|
||||
"year": "2013",
|
||||
"volume": "21",
|
||||
"journal-title": "Osteoarthr Cartil"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "623",
|
||||
"issue": "3",
|
||||
"author": "Hunter",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib28",
|
||||
"DOI": "10.1016/j.rdc.2008.05.004",
|
||||
"article-title": "The symptoms of osteoarthritis and the genesis of pain",
|
||||
"year": "2008",
|
||||
"volume": "34",
|
||||
"journal-title": "Rheum Dis Clin N Am"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1326",
|
||||
"issue": "11",
|
||||
"author": "Xu",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib29",
|
||||
"DOI": "10.1038/nm.3978",
|
||||
"article-title": "Inhibition of mechanical allodynia in neuropathic pain by TLR5-mediated A-fiber blockade",
|
||||
"year": "2015",
|
||||
"volume": "21",
|
||||
"journal-title": "Nat Med"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "145",
|
||||
"issue": "October 2017",
|
||||
"author": "Lacagnina",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib30",
|
||||
"DOI": "10.1016/j.pharmthera.2017.10.006",
|
||||
"article-title": "Toll-like receptors and their role in persistent pain",
|
||||
"year": "2018",
|
||||
"volume": "184",
|
||||
"journal-title": "Pharmacol Ther"
|
||||
},
|
||||
{
|
||||
"key": "10.1016/j.joca.2019.11.002_bib31",
|
||||
"first-page": "2016",
|
||||
"article-title": "Blockade of toll-like receptors (TLR2, TLR4) attenuates pain and potentiates buprenorphine analgesia in a rat neuropathic pain model",
|
||||
"author": "Jurga",
|
||||
"year": "2016",
|
||||
"journal-title": "Neural Plast"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "357",
|
||||
"issue": "5",
|
||||
"author": "Huang",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib32",
|
||||
"DOI": "10.1007/s11926-009-0051-z",
|
||||
"article-title": "The role of toll-like receptors in rheumatoid arthritis",
|
||||
"year": "2009",
|
||||
"volume": "11",
|
||||
"journal-title": "Curr Rheumatol Rep"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib33",
|
||||
"first-page": "1",
|
||||
"article-title": "TLR4 signalling in osteoarthritis-finding targets for candidate DMOADs",
|
||||
"volume": "11",
|
||||
"author": "Gómez",
|
||||
"year": "2014",
|
||||
"journal-title": "Nat Rev Rheumatol"
|
||||
},
|
||||
{
|
||||
"issue": "5 Pt 1",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib34",
|
||||
"first-page": "1432",
|
||||
"article-title": "Procollagen types I and III aminoterminal propeptide levels during acute respiratory distress syndrome and in response to methylprednisolone treatment.[Erratum appears in Am J Respir Crit Care Med. 2013 Dec 15;188(12):1477]",
|
||||
"volume": "158",
|
||||
"author": "Meduri",
|
||||
"year": "1998",
|
||||
"journal-title": "Am J Respir Crit Care Med"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "5",
|
||||
"issue": "1",
|
||||
"author": "Scheja",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib35",
|
||||
"DOI": "10.3109/03009749209095054",
|
||||
"article-title": "Serum levels of aminoterminal type III procollagen peptide and hyaluronan predict mortality in systemic sclerosis",
|
||||
"year": "1992",
|
||||
"volume": "21",
|
||||
"journal-title": "Scand J Rheumatol"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "547",
|
||||
"issue": "8",
|
||||
"author": "Kjelgaard-Petersen",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib36",
|
||||
"DOI": "10.3109/1354750X.2015.1105497",
|
||||
"article-title": "Synovitis biomarkers: ex vivo characterization of three biomarkers for identification of inflammatory osteoarthritis",
|
||||
"year": "2015",
|
||||
"volume": "20",
|
||||
"journal-title": "Biomarkers"
|
||||
},
|
||||
{
|
||||
"key": "10.1016/j.joca.2019.11.002_bib37",
|
||||
"article-title": "Sensitization and serological biomarkers in knee osteoarthritis patients with different degrees of synovitis",
|
||||
"author": "Petersen",
|
||||
"year": "2015",
|
||||
"journal-title": "Clin J Pain"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "309",
|
||||
"author": "He",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib38",
|
||||
"DOI": "10.1186/1471-2474-15-309",
|
||||
"article-title": "Type X collagen levels are elevated in serum from human osteoarthritis patients and associated with biomarkers of cartilage degradation and inflammation",
|
||||
"year": "2014",
|
||||
"volume": "15",
|
||||
"journal-title": "BMC Muscoskelet Disord"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1",
|
||||
"issue": "1",
|
||||
"author": "Bay-Jensen",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib39",
|
||||
"DOI": "10.1371/journal.pone.0054504",
|
||||
"article-title": "Circulating protein fragments of cartilage and connective tissue degradation are diagnostic and prognostic markers of rheumatoid arthritis and ankylosing spondylitis",
|
||||
"year": "2013",
|
||||
"volume": "8",
|
||||
"journal-title": "PLoS One"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "44",
|
||||
"issue": "1",
|
||||
"author": "Siebuhr",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib40",
|
||||
"DOI": "10.1016/j.joca.2013.10.020",
|
||||
"article-title": "Identification and characterisation of osteoarthritis patients with inflammation derived tissue turnover",
|
||||
"year": "2014",
|
||||
"volume": "22",
|
||||
"journal-title": "Osteoarthr Cartil"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "221",
|
||||
"issue": "3",
|
||||
"author": "Yuji Yoshida",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib41",
|
||||
"DOI": "10.1016/0306-9877(88)90147-8",
|
||||
"article-title": "Interleukin 6 and rheumatoid arthritis",
|
||||
"year": "1988",
|
||||
"volume": "27",
|
||||
"journal-title": "Med Hypotheses"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "5173",
|
||||
"issue": "22",
|
||||
"author": "Suthaus",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib42",
|
||||
"DOI": "10.1182/blood-2011-09-377705",
|
||||
"article-title": "HHV8 encoded viral IL-6 collaborates with mouse IL-6 in MCD-like development in mice",
|
||||
"year": "2012",
|
||||
"volume": "119",
|
||||
"journal-title": "Blood"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1",
|
||||
"issue": "1",
|
||||
"author": "Zhou",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib43",
|
||||
"DOI": "10.1186/s12974-016-0607-6",
|
||||
"article-title": "Interleukin-6: an emerging regulator of pathological pain",
|
||||
"year": "2016",
|
||||
"volume": "13",
|
||||
"journal-title": "J Neuroinflammation"
|
||||
},
|
||||
{
|
||||
"key": "10.1016/j.joca.2019.11.002_bib44",
|
||||
"series-title": "Local Translation and Retrograde Axonal Transport of Creb Regulates Il-6-Induced Nociceptive Plasticity",
|
||||
"first-page": "1",
|
||||
"author": "Melemedjian",
|
||||
"year": "2014"
|
||||
},
|
||||
{
|
||||
"issue": "6",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib45",
|
||||
"first-page": "1149",
|
||||
"article-title": "Aggrecanase and Aggrecan degradation in osteoarthritis: a review",
|
||||
"volume": "36",
|
||||
"author": "Huang",
|
||||
"year": "2008",
|
||||
"journal-title": "J Int Med Res"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "133",
|
||||
"issue": "1",
|
||||
"author": "Troeberg",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib46",
|
||||
"DOI": "10.1016/j.bbapap.2011.06.020",
|
||||
"article-title": "Proteases involved in cartilage matrix degradation in osteoarthritis",
|
||||
"year": "2012",
|
||||
"volume": "1824",
|
||||
"journal-title": "Biochim Biophys Acta Protein Proteonomics"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1633",
|
||||
"issue": "11",
|
||||
"author": "Zhang",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib47",
|
||||
"DOI": "10.1136/ard.2007.079574",
|
||||
"article-title": "Differential Toll-like receptor-dependent collagenase expression in chondrocytes",
|
||||
"year": "2008",
|
||||
"volume": "67",
|
||||
"journal-title": "Ann Rheum Dis"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "2152",
|
||||
"issue": "7",
|
||||
"author": "Kim",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib48",
|
||||
"DOI": "10.1002/art.21951",
|
||||
"article-title": "The catabolic pathway mediated by toll-like receptors in human osteoarthritic chondrocytes",
|
||||
"year": "2006",
|
||||
"volume": "54",
|
||||
"journal-title": "Arthritis Rheum"
|
||||
},
|
||||
{
|
||||
"issue": "2018",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib49",
|
||||
"article-title": "The anti-ADAMTS-5 nanobody®, M6495, protects against cartilage breakdown in cartilage and synovial joint tissue explant models",
|
||||
"volume": "26",
|
||||
"author": "Siebuhr",
|
||||
"year": "2018",
|
||||
"journal-title": "Osteoarthr Cartil"
|
||||
},
|
||||
{
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "2933",
|
||||
"issue": "11",
|
||||
"author": "Miller",
|
||||
"key": "10.1016/j.joca.2019.11.002_bib50",
|
||||
"DOI": "10.1002/art.39291",
|
||||
"article-title": "Damage-associated molecular patterns generated in osteoarthritis directly excite murine nociceptive neurons through toll-like receptor 4",
|
||||
"year": "2015",
|
||||
"volume": "67",
|
||||
"journal-title": "Arthritis Rheum"
|
||||
}
|
||||
],
|
||||
"title": [
|
||||
"Inflammation and joint destruction may be linked to the generation of cartilage metabolites of ADAMTS-5 through activation of toll-like receptors"
|
||||
],
|
||||
"link": [
|
||||
{
|
||||
"URL": "https://api.elsevier.com/content/article/PII:S106345841931266X?httpAccept=text/xml",
|
||||
"content-type": "text/xml",
|
||||
"content-version": "vor",
|
||||
"intended-application": "text-mining"
|
||||
},
|
||||
{
|
||||
"URL": "https://api.elsevier.com/content/article/PII:S106345841931266X?httpAccept=text/plain",
|
||||
"content-type": "text/plain",
|
||||
"content-version": "vor",
|
||||
"intended-application": "text-mining"
|
||||
}
|
||||
],
|
||||
"source": "Crossref",
|
||||
"type": "journal-article",
|
||||
"publisher": "Elsevier BV",
|
||||
"journal-issue": {
|
||||
"issue": "5",
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"volume": "28",
|
||||
"references-count": 50,
|
||||
"issn-type": [
|
||||
|
||||
{
|
||||
"value": "2227-9717",
|
||||
"type": "electronic"
|
||||
},
|
||||
{
|
||||
"value": "VALUE",
|
||||
"type": "PIPPO"
|
||||
},
|
||||
{
|
||||
"value": "1063-4584",
|
||||
"type": "pu"
|
||||
}
|
||||
],
|
||||
"assertion": [
|
||||
{
|
||||
"value": "Elsevier",
|
||||
"name": "publisher",
|
||||
"label": "This article is maintained by"
|
||||
},
|
||||
{
|
||||
"value": "Inflammation and joint destruction may be linked to the generation of cartilage metabolites of ADAMTS-5 through activation of toll-like receptors",
|
||||
"name": "articletitle",
|
||||
"label": "Article Title"
|
||||
},
|
||||
{
|
||||
"value": "Osteoarthritis and Cartilage",
|
||||
"name": "journaltitle",
|
||||
"label": "Journal Title"
|
||||
},
|
||||
{
|
||||
"value": "https://doi.org/10.1016/j.joca.2019.11.002",
|
||||
"name": "articlelink",
|
||||
"label": "CrossRef DOI link to publisher maintained version"
|
||||
},
|
||||
{
|
||||
"value": "article",
|
||||
"name": "content_type",
|
||||
"label": "Content Type"
|
||||
},
|
||||
{
|
||||
"value": "© 2019 Osteoarthritis Research Society International. Published by Elsevier Ltd.",
|
||||
"name": "copyright",
|
||||
"label": "Copyright"
|
||||
}
|
||||
],
|
||||
"deposited": {
|
||||
"date-parts": [
|
||||
[
|
||||
2022,
|
||||
7,
|
||||
9
|
||||
]
|
||||
],
|
||||
"date-time": "2022-07-09T14:08:02Z",
|
||||
"timestamp": 1657375682000
|
||||
},
|
||||
"language": "en",
|
||||
"page": "658-668",
|
||||
"short-container-title": [
|
||||
"Osteoarthritis and Cartilage"
|
||||
]
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,41 @@
|
|||
package eu.dnetlib.dhp.collection.crossref
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.mockito.junit.jupiter.MockitoExtension
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||
class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
@BeforeEach
|
||||
def setUp(): Unit = {
|
||||
super.setUpVocabulary()
|
||||
}
|
||||
|
||||
@Test
|
||||
def testMapping(): Unit = {
|
||||
val spark = SparkSession.builder().master("local[*]").appName("TransformCrossref").getOrCreate()
|
||||
|
||||
val s = new SparkMapDumpIntoOAF(null, null, null)
|
||||
import spark.implicits._
|
||||
|
||||
s.transformCrossref(
|
||||
spark,
|
||||
sourcePath = "/home/sandro/Downloads/crossref",
|
||||
targetPath = "/home/sandro/Downloads/crossref_transformed",
|
||||
vocabularies = vocabularies
|
||||
)
|
||||
|
||||
print(spark.read.text("/home/sandro/Downloads/crossref_transformed").count)
|
||||
}
|
||||
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
|
@ -48,16 +48,14 @@ case object Crossref2Oaf {
|
|||
json.extract[List[funderInfo]]
|
||||
}
|
||||
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
val id = doi.split("/").last
|
||||
irishFunder
|
||||
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
|
||||
.map(f => f.id)
|
||||
}
|
||||
|
||||
|
||||
|
||||
def mappingResult(result: Result, json: JValue, instanceType:Qualifier, originalType: String): Result = {
|
||||
def mappingResult(result: Result, json: JValue, instanceType: Qualifier, originalType: String): Result = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
|
@ -298,22 +296,22 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
/** *
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
def getTypeQualifier(
|
||||
resourceType: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): (Qualifier, Qualifier, String) = {
|
||||
resourceType: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): (Qualifier, Qualifier, String) = {
|
||||
if (resourceType != null && resourceType.nonEmpty) {
|
||||
val typeQualifier =
|
||||
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||
|
@ -340,7 +338,7 @@ case object Crossref2Oaf {
|
|||
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
||||
if (objectType == null)
|
||||
return resultList
|
||||
val typology =getTypeQualifier(objectType, vocabularies)
|
||||
val typology = getTypeQualifier(objectType, vocabularies)
|
||||
|
||||
if (typology == null)
|
||||
return List()
|
||||
|
@ -349,7 +347,6 @@ case object Crossref2Oaf {
|
|||
if (result == null)
|
||||
return List()
|
||||
|
||||
|
||||
mappingResult(result, json, typology._1, typology._3)
|
||||
if (result == null || result.getId == null)
|
||||
return List()
|
||||
|
@ -693,12 +690,11 @@ case object Crossref2Oaf {
|
|||
val item = new Dataset
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}
|
||||
else if (objectType.getClassid.equalsIgnoreCase("software")){
|
||||
} else if (objectType.getClassid.equalsIgnoreCase("software")) {
|
||||
val item = new Software
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")){
|
||||
} else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")) {
|
||||
val item = new OtherResearchProduct
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
|
|
|
@ -48,7 +48,6 @@ object SparkMapDumpIntoOAF {
|
|||
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||
require(vocabularies != null)
|
||||
|
||||
|
||||
spark.read
|
||||
.load(parser.get("sourcePath"))
|
||||
.as[CrossrefDT]
|
||||
|
|
|
@ -17,35 +17,34 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|||
|
||||
public abstract class AbstractVocabularyTest {
|
||||
|
||||
@Mock
|
||||
protected ISLookUpService isLookUpService;
|
||||
@Mock
|
||||
protected ISLookUpService isLookUpService;
|
||||
|
||||
protected VocabularyGroup vocabularies;
|
||||
protected VocabularyGroup vocabularies;
|
||||
|
||||
public void setUpVocabulary() throws ISLookUpException, IOException {
|
||||
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||
public void setUpVocabulary() throws ISLookUpException, IOException {
|
||||
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||
|
||||
lenient()
|
||||
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||
.thenReturn(synonyms());
|
||||
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||
}
|
||||
lenient()
|
||||
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||
.thenReturn(synonyms());
|
||||
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||
}
|
||||
|
||||
private static List<String> vocs() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/terms.txt")));
|
||||
}
|
||||
|
||||
private static List<String> synonyms() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/synonyms.txt")));
|
||||
}
|
||||
private static List<String> vocs() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/terms.txt")));
|
||||
}
|
||||
|
||||
private static List<String> synonyms() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/synonyms.txt")));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import scala.io.Source
|
|||
import scala.util.matching.Regex
|
||||
|
||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||
class CrossrefMappingTest extends AbstractVocabularyTest{
|
||||
class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
@ -34,10 +34,15 @@ class CrossrefMappingTest extends AbstractVocabularyTest{
|
|||
}
|
||||
|
||||
@Test
|
||||
def testMissingAuthorParser():Unit = {
|
||||
val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString
|
||||
def testMissingAuthorParser(): Unit = {
|
||||
val json: String = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json"))
|
||||
.mkString
|
||||
val result = Crossref2Oaf.convert(json, vocabularies)
|
||||
result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0))
|
||||
result
|
||||
.filter(o => o.isInstanceOf[Publication])
|
||||
.map(p => p.asInstanceOf[Publication])
|
||||
.foreach(p => assertTrue(p.getAuthor.size() > 0))
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -127,7 +132,6 @@ class CrossrefMappingTest extends AbstractVocabularyTest{
|
|||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
|
||||
items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||
|
||||
}
|
||||
|
@ -169,7 +173,6 @@ class CrossrefMappingTest extends AbstractVocabularyTest{
|
|||
|
||||
assertEquals(doisReference.size, relationList.size)
|
||||
|
||||
|
||||
relationList.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||
}
|
||||
|
||||
|
@ -190,7 +193,6 @@ class CrossrefMappingTest extends AbstractVocabularyTest{
|
|||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
|
||||
items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||
|
||||
}
|
||||
|
@ -646,7 +648,6 @@ class CrossrefMappingTest extends AbstractVocabularyTest{
|
|||
|
||||
val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||
|
||||
|
||||
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||
|
||||
assertTrue(
|
||||
|
|
Loading…
Reference in New Issue