forked from D-Net/dnet-hadoop
merged stable ids
This commit is contained in:
commit
bbe8193930
|
@ -131,18 +131,9 @@ public class HttpConnector2 {
|
|||
}
|
||||
return attemptDownload(newUrl, retryNumber + 1, report);
|
||||
}
|
||||
if (is4xx(urlConn.getResponseCode())) {
|
||||
// CLIENT ERROR, DO NOT RETRY
|
||||
report
|
||||
.put(
|
||||
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||
String
|
||||
.format(
|
||||
"%s error: %s", requestUrl, urlConn.getResponseMessage()));
|
||||
throw new CollectorException("4xx error: request will not be repeated. " + report);
|
||||
}
|
||||
if (is5xx(urlConn.getResponseCode())) {
|
||||
if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) {
|
||||
switch (urlConn.getResponseCode()) {
|
||||
case HttpURLConnection.HTTP_NOT_FOUND:
|
||||
case HttpURLConnection.HTTP_BAD_GATEWAY:
|
||||
case HttpURLConnection.HTTP_UNAVAILABLE:
|
||||
case HttpURLConnection.HTTP_GATEWAY_TIMEOUT:
|
||||
|
|
|
@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
|||
|
||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}";
|
||||
public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z";
|
||||
|
||||
private static final String FORMAT_PARAM = "format";
|
||||
private static final String OAI_SET_PARAM = "set";
|
||||
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
||||
|
@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
|||
throw new CollectorException("Param 'mdFormat' is null or empty");
|
||||
}
|
||||
|
||||
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
||||
}
|
||||
|
||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
||||
}
|
||||
|
||||
|
|
|
@ -107,10 +107,12 @@ public class OaiIterator implements Iterator<String> {
|
|||
if (set != null && !set.isEmpty()) {
|
||||
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
||||
}
|
||||
if (fromDate != null && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
||||
}
|
||||
if (untilDate != null && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
||||
}
|
||||
log.info("Start harvesting using url: " + url);
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
// from PROD 2021-07-06 , tf script of HAL with around 3mill. records
|
||||
declare_script "dc_cleaning_OpenAIREplus_compliant_hal";
|
||||
declare_ns oaf = "http://namespace.openaire.eu/oaf";
|
||||
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
|
||||
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
|
||||
declare_ns dc = "http://purl.org/dc/elements/1.1/";
|
||||
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
|
||||
declare_ns oai = "http://www.openarchives.org/OAI/2.0/";
|
||||
declare_ns xs = "http://www.w3.org/2001/XMLSchema";
|
||||
$var0 = "''";
|
||||
$varFP7 = "'corda_______::'";
|
||||
$varH2020 = "'corda__h2020::'";
|
||||
$varDummy = "''";
|
||||
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
||||
static $varRepoid = xpath:"//dri:repositoryId";
|
||||
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
|
||||
dri:objIdentifier = xpath:"//dri:objIdentifier";
|
||||
dri:repositoryId = $varRepoid;
|
||||
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
|
||||
//
|
||||
// communities - deactivated until received green light from DARIAH to mark community on prod also
|
||||
// $varCommunity = xpath:"//*[local-name()='setSpec'][starts-with(., 'collection:DARIAH')]/'dariah'";
|
||||
// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records)
|
||||
// oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;);
|
||||
//
|
||||
// apply xpath:"//dc:contributor[starts-with(., 'European Project')]" if xpath:"string-length(replace(., '.*(\d{6,6}).*', '$1')) = 6" oaf:projectid = xpath:"concat($var1, replace(., '.*(\d{6,6}).*', '$1'))"; else $varDummy = "''";
|
||||
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
if xpath:"//dc:title[string-length(.)> 0]" $varDummy = "''"; else dc:coverage = skipRecord();
|
||||
dc:title = xpath:"//dc:title[string-length(.) > 0]/normalize-space(.)";
|
||||
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
dc:contributor = xpath:"//dc:contributor";
|
||||
// dc:description = xpath:"//dc:description/normalize-space(.)";
|
||||
//dc:description = xpath:"string-join(//dc:description/normalize-space(.), concat('; ',codepoints-to-string(10)))";
|
||||
dc:description = xpath:"string-join(//dc:description/normalize-space(.), '; ')";
|
||||
dc:format = xpath:"//dc:format";
|
||||
$varHttpTest = "''";
|
||||
oaf:fulltext = xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))]";
|
||||
//if xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))] or //dc:relation[starts-with(lower-case(normalize-space(.)), 'info:eu-repo/grantagreement')] or //dc:rights[starts-with(lower-case(normalize-space(.)), 'open') or contains(lower-case(normalize-space(.)), 'openaccess')] or //dc:accessRights[contains(lower-case(normalize-space(.)), 'openaccess')]" $var0 = "''"; else dc:coverage = skipRecord();
|
||||
if xpath:"//dc:identifier[starts-with(., 'http')]" $var0 = "''"; else dc:coverage = skipRecord();
|
||||
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
|
||||
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
|
||||
static dr:dateOfTransformation = xpath:"current-dateTime()";
|
||||
dc:type = xpath:"//dc:type";
|
||||
dc:format = xpath:"//dc:format";
|
||||
dc:date = xpath:"//dc:date";
|
||||
dc:language = Convert(xpath:"//dc:language", Languages);
|
||||
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
|
||||
if xpath:"starts-with($varDateAccepted, '0')" oaf:dateAccepted = $varDummy; else oaf:dateAccepted = $varDateAccepted;
|
||||
$varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')";
|
||||
oaf:embargoenddate = $varEmbargoEnd;
|
||||
// FP7
|
||||
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
||||
// H2020
|
||||
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2012][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
||||
// H2020 workaround for HAL
|
||||
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement//)(\d\d\d\d\d\d)(.*)', 'i')][//dc:contributor[contains(lower-case(.), 'h2020')]][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2012][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement//)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
||||
dc:relation = xpath:"//dc:relation";
|
||||
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
|
||||
//
|
||||
oaf:collectedDatasourceid = xpath:"$varDatasourceid";
|
||||
//
|
||||
//if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type) | //oai:setSpec", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
|
||||
$varCobjCategoryReverse = Convert(xpath:"insert-before(reverse(//dc:type) , 0, reverse(//oai:setSpec))", TextTypologies);
|
||||
$varSuperTypeReverse = Convert(xpath:"normalize-space($varCobjCategoryReverse)", SuperTypes);
|
||||
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']/$varCobjCategoryReverse", @type = $varSuperTypeReverse;);
|
||||
$varCobjCategoryStraight = Convert(xpath:"insert-before(//dc:type , 100, //oai:setSpec)", TextTypologies);
|
||||
$varSuperTypeStraight = Convert(xpath:"normalize-space($varCobjCategoryStraight)", SuperTypes);
|
||||
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other'))]/$varCobjCategoryStraight", @type = $varSuperTypeStraight;);
|
||||
//
|
||||
// review level
|
||||
// oaf:refereed = Convert(xpath:"//dc:description", ReviewLevels);
|
||||
$varRefereedConvt = Convert(xpath:"(//dc:type, //oai:setSpec, //dc:description)", ReviewLevels);
|
||||
$varRefereedDesct = xpath:"(//dc:description[matches(lower-case(.), '.*(this\s*book|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*')]/'0001', //dc:description[matches(., '^version\s*(préliminaire.*|0$)')]/'0002')";
|
||||
$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)])pre[\.\-_/\s\(\)]?prints?([\.\-_/\s\(\)].*)?$')][count(//dc:identifier) = 1]/'0002', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)])refereed([\.\-_/\s\(\)\d].*)?$')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and contains(lower-case(.), '-peer-reviewed-article-')]/'0001')";
|
||||
$varRefereed = xpath:"($varRefereedConvt, $varRefereedIdntf, $varRefereedDesct)";
|
||||
if xpath:"count(index-of($varRefereed, '0001')) >0" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
|
||||
if xpath:"count(index-of($varRefereed, '0002')) >0 and count(index-of($varRefereed, '0001')) = 0" oaf:refereed = xpath:"'0002'"; else $varDummy= "''";
|
||||
//
|
||||
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
|
||||
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
|
||||
//2021-06-01 ; acz ; next line to avoid to be OPEN as default, set to UNKNOWN , 2021-07-05 acz
|
||||
//if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "UNKNOWN";
|
||||
oaf:license = xpath:"//dc:rights[starts-with(., 'http') or matches(., '^CC[- ]BY([- ](NC([- ](ND|SA))?|ND|SA))([- ]\d(\.\d)?)?$', 'i')]";
|
||||
//
|
||||
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
//
|
||||
//$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
|
||||
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(., \"info:\") or starts-with(., \"urn:\") or starts-with(., \"doi:\") or starts-with(., \"DOI:\") or starts-with(., \"Doi:\") or starts-with(., \"doi \") or starts-with(., \"DOI \") or starts-with(., \"Doi \") or starts-with(., \"10.\") or ((starts-with(., \"http\")) and contains(., \"doi.org/10.\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/10.\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/http\")) and contains(., \"doi.org/10.\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
|
||||
|
||||
$varIdHdl = identifierExtract('["//dc:identifier[starts-with(., \"HDL:\") and not(starts-with(., \"HDL: http\"))][not(contains(., \"123456789\"))]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/hdl/\") or (starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/url/\") and contains(., \"://hdl.handle.net/\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(info:hdl:|://hdl.handle.net/|info:eu-repo/semantics/altIdentifier/hdl/))(\d.*)');
|
||||
|
||||
|
||||
$varIdIsbn = xpath:"(//dc:identifier, //dc:source)[starts-with(lower-case(.), 'isbn') or starts-with(., '978') or starts-with(., '979')][(matches(., '(isbn[:\s]*)?97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(concat('97', substring-after(., '97'))) = 17) or matches(., '(isbn[:\s]*)?97[89]\d{10}$', 'i')]/replace(., 'isbn[:\s]*', '', 'i'), //dc:relation[starts-with(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')][(matches(., 'info:eu-repo/semantics/altIdentifier/isbn/97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(.) = 59) or matches(., 'info:eu-repo/semantics/altidentifier/isbn/97[89]\d{10}$', 'i')]/substring-after(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')";
|
||||
|
||||
$varIdBibc = identifierExtract('["//dc:identifier[starts-with(., \"BibCode:\") or starts-with(., \"BIBCODE:\") or (starts-with(., \"http:\") and contains(., \"bibcode=\"))]"]' , xpath:"./*[local-name()='record']" , '(^(BibCode:|BIBCODE:|http).*$)');
|
||||
|
||||
$varIdPtnt = identifierExtract('["//dc:identifier[starts-with(., \"Patent N°:\")]"]' , xpath:"./*[local-name()='record']" , '(^Patent N°:.*$)');
|
||||
|
||||
$varPmId = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"PUBMED:\")]"]' , xpath:"./*[local-name()='record']" , '(?!PUBMED: )(\d+)');
|
||||
|
||||
$varIdPmc = identifierExtract('["//dc:identifier[starts-with(., \"PUBMEDCENTRAL:\") or (starts-with(., \"http\") and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/PMC\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/http\")) and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]"]' , xpath:"./*[local-name()='record']" , '(PMC\d+)');
|
||||
|
||||
//$varIdHal = identifierExtract('["//dc:identifier[starts-with(., \"ads-\") or starts-with(., \"anses-\") or starts-with(., \"artxibo-\") or starts-with(., \"bioemco-\") or starts-with(., \"cea-\") or starts-with(., \"cel-\") or starts-with(., \"cirad-\") or starts-with(., \"edutice-\") or starts-with(., \"emse-\") or starts-with(., \"EMSE-\") or starts-with(., \"ensl-\") or starts-with(., \"hal-\") or starts-with(., \"HAL-\") or starts-with(., \"halsde-\") or starts-with(., \"halshs-\") or starts-with(., \"hprints-\") or starts-with(., \"in2p3-\") or starts-with(., \"ineris-\") or starts-with(., \"inria-\") or starts-with(., \"Inria-\") or starts-with(., \"inserm-\") or starts-with(., \"insu-\") or starts-with(., \"INSU-\") or starts-with(., \"ird-\") or starts-with(., \"irsn-\") or starts-with(., \"jpa-\") or starts-with(., \"lirmm-\") or starts-with(., \"medihal-\") or starts-with(., \"meteo-\") or starts-with(., \"mnhn-\") or starts-with(., \"obspm-\") or starts-with(., \"pastel-\") or starts-with(., \"pasteur-\") or starts-with(., \"Pasteur-\") or starts-with(., \"peer-\") or starts-with(., \"ssa-\") or starts-with(., \"tel-\") or starts-with(., \"ujm-\") or starts-with(., \"ijn_\") or starts-with(., \"sic_\") or (starts-with(., \"http\") and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\") or contains(., \"://medihal.archives-ouvertes.fr/hal\")))]", "//dc:relation[((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\")) and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\")))]"]' , xpath:"./*[local-name()='record']" , '((ads|anses|artxibo|bioemco|cea|cel|cirad|edutice|emse|EMSE|ensl|hal|HAL|halsde|halshs|hprints|in2p3|ineris|inria|Inria|inserm|insu|INSU|ird|irsn|jpa|lirmm|medihal|meteo|mnhn|obspm|pastel|pasteur|Pasteur|peer|ssa|tel|ujm)-|(ijn|sic)_).*');
|
||||
$varIdHal = identifierExtract('["//*[local-name() = \"recordIdentifier\"]"]' , xpath:"./*[local-name()='record']" , '(oai:HAL:.*)');
|
||||
|
||||
$varIdArxv = identifierExtract('["//dc:identifier[((starts-with(., \"http\") or starts-with(., \"ArXiv: http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\"))) or starts-with(., \"arXiv:\") or starts-with(., \"ARXIV:\")]", "//dc:relation[(starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/\") and not(contains(., \"/arxiv/http\"))) or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\")))]"]' , xpath:"./*[local-name()='record']" , '(?!(://arxiv.org/abs/|:eu-repo/semantics/altIdentifier/arxiv/))([a-zA-Z].*)');
|
||||
|
||||
$varIdWos = identifierExtract('["//dc:identifier[starts-with(., \"WOS:\") or starts-with(., \"wos: WOS:\")]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/wos/\")]"]' , xpath:"./*[local-name()='record']" , '(info.*|WOS:.+|wos: WOS:.+)');
|
||||
|
||||
//oaf:identifier = set(xpath:"$varId//value[not[. = '10.1145/nnnnnnn.nnnnnnn']]", @identifierType = "doi";);
|
||||
oaf:identifier = set(xpath:"$varIdDoi//value[not(. = '10.1145/nnnnnnn.nnnnnnn')]", @identifierType = "doi";);
|
||||
oaf:identifier = set(xpath:"$varIdHdl//value", @identifierType = "handle";);
|
||||
oaf:identifier = set(xpath:"$varIdIsbn", @identifierType = "isbn";);
|
||||
|
||||
oaf:identifier = set(xpath:"($varIdBibc//value[not(starts-with(., 'http'))]/replace(., 'BIBCODE:\s*', ''), $varIdBibc//value[starts-with(., 'http') and contains(substring-after(., 'bibcode='), codepoints-to-string(38))]/substring-before(substring-after(., 'bibcode='), codepoints-to-string(38)), $varIdBibc//value[starts-with(., 'http') and not(contains(substring-after(., 'bibcode='), codepoints-to-string(38)))]/substring-after(., 'bibcode='))", @identifierType = "bibcode";);
|
||||
|
||||
oaf:identifier = set(xpath:"$varIdPtnt//value/normalize-space(substring-after(., 'Patent N°:'))", @identifierType = "patentNumber";);
|
||||
|
||||
oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";);
|
||||
oaf:identifier = set(xpath:"$varIdPmc//value", @identifierType = "pmcid";);
|
||||
//oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(., '(/document|/image|/file/.*)$', ''))", @identifierType = "hal";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', ''))", @identifierType = "hal";);
|
||||
oaf:identifier = set(xpath:"distinct-values(($varIdArxv//value/normalize-space(replace(., '(https?://arxiv.org/abs/|https?://arxiv.org/pdf/|info:eu-repo/semantics/altIdentifier/arxiv/|info:eu-repo/semantics/altIdentifier/url/|info:eu-repo/semantics/altIdentifier/urn/|arXiv:|\.pdf)', '', 'i'))))", @identifierType = "arxiv";);
|
||||
oaf:identifier = set(xpath:"$varIdWos//value/normalize-space(replace(., '(info:eu-repo/semantics/altIdentifier/wos/|WOS:|wos:)', ''))", @identifierType = "wos";);
|
||||
|
||||
oaf:identifier = set(xpath:"distinct-values(//dc:identifier[starts-with(., 'http') and contains(., $varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', ''))]/replace(., '(/document|/image|/file/.*)$', ''))", @identifierType = "landingPage";);
|
||||
oaf:identifier = set(xpath:"distinct-values(//dc:identifier[starts-with(., 'http') and not(ends-with(., $varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', '')))])", @identifierType = "url";);
|
||||
|
||||
oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-original";);
|
||||
|
||||
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
|
||||
|
||||
// journal data
|
||||
// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...)
|
||||
$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))";
|
||||
//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))";
|
||||
$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))";
|
||||
oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";);
|
||||
|
||||
end
|
|
@ -0,0 +1,140 @@
|
|||
// from PROD 2021-07-06 , tf script of DOAJ with more than 6mill. records
|
||||
declare_script "dc_cleaning_OpenAIREplus_compliant_doaj";
|
||||
declare_ns oaf = "http://namespace.openaire.eu/oaf";
|
||||
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
|
||||
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
|
||||
declare_ns dc = "http://purl.org/dc/elements/1.1/";
|
||||
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
|
||||
$var0 = "''";
|
||||
$varFP7 = "'corda_______::'";
|
||||
$varH2020 = "'corda__h2020::'";
|
||||
$varDummy = "''";
|
||||
// $varUnknownRepoId = "'openaire____::55045bd2a65019fd8e6741a755395c8c'";
|
||||
//
|
||||
$varUnknownRepoId = "'openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18'";
|
||||
$varUnknownRepoName = "'Unknown Repository'";
|
||||
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
||||
static $varRepoid = xpath:"//dri:repositoryId";
|
||||
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
|
||||
dri:objIdentifier = xpath:"//dri:objIdentifier";
|
||||
dri:repositoryId = $varRepoid;
|
||||
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
|
||||
|
||||
if xpath:"//dc:creator[string-length(normalize-space(.)) &gt; 0][contains(., 'CDATA')][starts-with(normalize-space(.), '(')][starts-with(normalize-space(.), '.')]" dc:creator = skipRecord(); else $varDummy = "''";
|
||||
//apply xpath:"//dc:creator" if xpath:"string-length(normalize-space(.)) &amp;gt; 0 and not(contains(., 'CDATA')) and not(starts-with(normalize-space(.), '.')) and not(starts-with(normalize-space(.), '('))" dc:creator = Convert(xpath:".", Person); else $varDummy = "''";
|
||||
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''";
|
||||
//apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0 and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
$varOrcidName = xpath:"//dc:creator[string-length(normalize-space(.)) > 0]";
|
||||
$varOrcidOrcid = xpath:"//dc:creator[string-length(normalize-space(.)) > 0]/@id/replace(., 'https?://orcid.org/', '')";
|
||||
dc:creator = set(xpath:"$varOrcidName", @nameIdentifier = xpath:"subsequence($varOrcidOrcid,position(),1)";, @nameIdentifierScheme=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','ORCID')";, @schemeUri=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','http://orcid.org/')";);
|
||||
|
||||
if xpath:"count(//dc:title[string-length(.) &gt; 0]) = 0" dc:title = skipRecord(); else $varDummy = "''";
|
||||
dc:title = xpath:"//dc:title/normalize-space(replace(., '^(&lt;title language=)(.)*(&gt;)', ''))";
|
||||
// apply xpath:"//dc:title" if xpath:"string-length(.) &gt; 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
|
||||
apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0 and not(@xsi:type = 'dcterms:LCSH')" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
dc:subject = set(xpath:"//dc:subject[@xsi:type = 'dcterms:LCSH']/concat('lcsh:', .)", @classid=xpath:"'lcsh'";, @classname=xpath:"'lcsh'";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
|
||||
|
||||
apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(replace(., '(&lt;br&gt;)', ''))"; else $varDummy = "''";
|
||||
apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
dc:contributor = xpath:"//dc:contributor";
|
||||
dc:description = xpath:"//dc:description[not(starts-with(., 'URN: urn:nbn:') or starts-with(., 'URN: http'))]";
|
||||
dc:format = xpath:"//dc:format";
|
||||
$varHttpTest = "''";
|
||||
if xpath:"//dc:relation[starts-with(., 'http') or starts-with(., 'www.')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
|
||||
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
|
||||
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'www.')" dc:identifier = xpath:"concat('http://', normalize-space(.))"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
|
||||
dr:CobjIdentifier = xpath:"distinct-values(//dc:identifier[not(starts-with(normalize-space(.), 'http'))][not(normalize-space(.) = ($varIdList))][not(starts-with(normalize-space(.), 'urn:nbn:') or starts-with(normalize-space(.), 'URN:NBN:'))][not(. = ($varISSN[1], $varISSN[2]))][normalize-space(.) != ''])";
|
||||
dc:identifier = xpath:"($varIdUrl//value[not(starts-with(., 'www'))], $varIdUrl//value[starts-with(., 'www')]/concat('http://', .), $varIdLdpg//value, $varIdDoi//value)[1]";
|
||||
dc:relation = xpath:"//dc:relation[starts-with(., 'https://doaj.org/toc/')]";
|
||||
|
||||
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
|
||||
static dr:dateOfTransformation = xpath:"current-dateTime()";
|
||||
// dc:type = xpath:"//dc:type";
|
||||
dc:language = Convert(xpath:"//dc:language", Languages);
|
||||
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
|
||||
dc:date = xpath:"//dc:date";
|
||||
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
|
||||
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
|
||||
//apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
|
||||
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
|
||||
//
|
||||
oaf:collectedDatasourceid = $varDatasourceid;
|
||||
//
|
||||
// apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
|
||||
//dr:CobjCategory = "0001";
|
||||
$varCobjCategory = Convert(xpath:"//dc:type", TextTypologies);
|
||||
$varSuperType = Convert(xpath:"normalize-space($varCobjCategory)", SuperTypes);
|
||||
dr:CobjCategory = set($varCobjCategory, @type = $varSuperType;);
|
||||
dc:type = xpath:"//dc:type";
|
||||
//
|
||||
// review status
|
||||
|
||||
$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:identifier' and matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]/'0001', //*[string(node-name(.)) = 'dc:relation' and matches(., '^info:eu-repo/semantics/altIdentifier/doi/10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$', 'i')]/'0001')";
|
||||
|
||||
$varRefereedProse = xpath:"(//*[string(node-name(.)) = 'dc:description' and matches(lower-case(.), '.*this\s*preprint\s*has\s*been\s*reviewed\s*and\s*recommended\s*by\s*peer\s*community') and contains(., '10.24072/')]/'0001', //dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001')";
|
||||
$varRefereedReltn = xpath:"(//dc:relation, //dc:identifier)[contains(., '://www.dovepress.com/') and matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001'";
|
||||
$varRefereedTitle = xpath:"//dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001'";
|
||||
$varRefereedDesct = xpath:"(//dc:description[matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001')";
|
||||
$varRefereed = xpath:"($varRefereedIdntf, $varRefereedProse, $varRefereedReltn, $varRefereedTitle, $varRefereedDesct)";
|
||||
//if xpath:"$varRefereed" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
|
||||
if xpath:"count(index-of($varRefereed, '0001')) >0" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
|
||||
if xpath:"count(index-of($varRefereed, '0002')) >0 and count(index-of($varRefereed, '0001')) = 0" oaf:refereed = xpath:"'0002'"; else $varDummy= "''";
|
||||
//
|
||||
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
|
||||
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
|
||||
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
|
||||
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
|
||||
oaf:license = xpath:"(//dc:rights, //dc:relation)[starts-with(normalize-space(.), 'http') and (contains(., '/licenses/') or contains(., '/licence/') or contains(., '/licencias/') or contains(., '/licencia/') or contains(., '://creativecommons.org/') or contains(., '://rightsstatements.org/')) or matches(., '^CC[- ]BY([- ](NC([- ](ND|SA))?|ND|SA))([- ]\d(\.\d)?)?$', 'i')][not(contains(normalize-space(.), ' '))]/normalize-space(.)";
|
||||
//
|
||||
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
//
|
||||
//$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
|
||||
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(., \"10.\") or starts-with(., \"DOI:\") or starts-with(., \"doi:\") or (starts-with(., \"http\") and contains(., \"doi.org/\"))]", "//dc:relation[starts-with(., \"10.\") or starts-with(., \"DOI:\") or starts-with(., \"doi:\") or (starts-with(., \"http\") and contains(., \"doi.org/\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
|
||||
$varIdHdl = identifierExtract('["//dc:relation[starts-with(., \"http\") and contains(., \"://hdl.handle.net/\")][not(contains(., \"123456789\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(://hdl.handle.net/))(\d.*)');
|
||||
$varIdUrn = identifierExtract('["//dc:relation[starts-with(., \"urn:nbn:\") or starts-with(., \"URN:NBN:\") or (starts-with(., \"http\") and (contains(., \"://nbn-resolving.org/urn:nbn:\") or contains(., \"://nbn-resolving.de/urn/resolver.pl?urn:nbn:\") or contains(., \"://nbn-resolving.de/urn:nbn:\") or contains(., \"://resolver.obvsg.at/urn:nbn:\") or contains(., \"://urn.fi/URN:NBN:\") or contains(., \"://urn.kb.se/resolve?urn=urn:nbn:\")))]", "//dc:description[contains(., \"URN: urn:nbn:de:0114-\") or contains(., \"URN: http://nbn-resolving.de/urn:nbn:de:0114-\") or (contains(., \"URN:NBN:no-\") and //dc:identifier = \"1893-1774\")]"]' , xpath:"./*[local-name()='record']" , '((urn:nbn:|URN:NBN:).*)');
|
||||
$varIdArk = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"http\") and contains(., \"/ark:\")]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdPmid = identifierExtract('["//dc:relation[starts-with(., \"http\") and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdPmc = identifierExtract('["//dc:relation[starts-with(., \"http\") and (contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\") or contains(., \"//europepmc.org/articles/PMC\"))]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdHal = identifierExtract('["//dc:relation[starts-with(., \"hal-\") or starts-with(., \"halshs-\") or starts-with(., \"halsde-\") or (starts-with(., \"http\") and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\")))]"]' , xpath:"./*[local-name()='record']" , '(hal(shs|sde)?-.*)');
|
||||
$varIdArxv = identifierExtract('["//dc:relation[starts-with(., \"http\") and (contains(., \"://arxiv.org/pdf/\") or contains(., \"://arxiv.org/abs/\"))]"]' , xpath:"./*[local-name()='record']" , '(\d.*)');
|
||||
$varIdLdpg = identifierExtract('["//dc:identifier[starts-with(., \"https://doaj.org/article/\")]"]', xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdUrl = identifierExtract('["//dc:relation[starts-with(., \"http\")][not(contains(., \"://doaj.org\"))][not(contains(., \"doi.org/\"))][not(contains(., \"hdl.handle.net/\"))][not(contains(., \"://nbn-resolving.de/\") or contains(., \"://nbn-resolving.org/\") or contains(., \"://resolver.obvsg.at/\") or contains(., \"://urn.fi/URN:NBN:\") or contains(., \"://urn.kb.se/resolve\"))][not(contains(., \"://arxiv.org/pdf/\") or contains(., \"://arxiv.org/abs/\"))][not(contains(., \"://localhost/\") or contains(., \"://localhost:\"))]", "//dc:relation[starts-with(., \"www\")]"]', xpath:"./*[local-name()='record']" , '((http|www).*)');
|
||||
|
||||
$varIdList = xpath:"(($varIdDoi//value, $varIdHdl//value, $varIdUrn//value, $varIdArk//value, $varIdPmid//value, $varIdPmc//value, $varIdLdpg//value, $varIdUrl//value))";
|
||||
|
||||
// dropping/cleaning wrong DOIs, as
|
||||
// 2 DOIs just different in 1 ending with . (mostly, but not exclusively, prefixed with 10.5216)
|
||||
// noise stemming from odd/wrong DOI statements' formats
|
||||
// DOIs with 2 prefixes
|
||||
// DOI statements containing first the DOI prefix and then the DOI incl. the resolver prefix
|
||||
//oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
|
||||
//oaf:identifier = set(xpath:"$varIdDoi//value", @identifierType = "doi";);
|
||||
|
||||
oaf:identifier = set(xpath:"distinct-values(($varIdDoi//value[not(ends-with(., '.') and exists(index-of($varIdDoi//value, substring(., 1, string-length(.)-1))))][not(. = '10.4313/article-4')][not(lower-case(.) = ('10.30659/ijibe.2.1.171-181', '10.30659/ijibe.2.1.171', '10.26843/rencima.v8i4.149', '10.26843/rencima.v11i1.215', '10.18273/revfue.v14n2-2016002revista', '10.17061/phrp3112015', '10.21789/24222704', '10.22432/pjsr.2017.14.', '10.22432/pjsr.2017.18.02', '10.22432/pjsr.2017.18.'))][not(starts-with(., '10.1530/VAB-'))][not(starts-with(lower-case(.), '10.1155/s168761720'))][not(starts-with(., '10.15561/10.6084/') or starts-with(., '10.5935/10.19180/'))][not(starts-with(., '10.7454/jvi.v') and string-length(.) = 16)][not(starts-with(., '10.15094/0000') and string-length(.) = 16)][not(matches(., '^10\.\d*/DOI:$'))][not(starts-with(., concat(substring-before(., '/'), '/', substring-before(., '/'), '/')))][not(matches(substring-after(., '/'), '^https?://(dx.)?doi.org/.*') and starts-with(substring-after(., 'doi.org/'), substring-before(., '/')))][not(starts-with(., '10.1371/journal.') and matches(., '^10\.1371/journal\.[a-z]{4}\.\d{7}\.(eor|20050521)$'))][not(substring-before(., '/') = ('10.19183', '10.18066') and matches(., '^(10\.19183/how\.\d*\.\d*|10\.18066/revunivap\.v\d*i\d*)$'))]/lower-case(.), $varIdDoi//value[matches(substring-after(., '/'), '^https?://(dx.)?doi.org/.*') and starts-with(substring-after(., 'doi.org/'), substring-before(., '/'))]/substring-after(., 'doi.org/'), $varIdDoi//value[starts-with(., '10.1371/journal.') and matches(., '^10\.1371/journal\.[a-z]{4}\.\d{7}\.eor$')]/substring(., 1, 28), $varIdDoi//value[starts-with(., '10.15561/10.6084/') or starts-with(., '10.5935/10.19180/')]/substring-after(., '/')))", @identifierType = "doi";);
|
||||
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdHdl//value/normalize-space(replace(., '\?locatt=view:master', '')))", @identifierType = "handle";);
|
||||
oaf:identifier = set(xpath:"$varIdUrn//value", @identifierType = "urn";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdArk//value/replace(substring-after(., '/ark:'), '^/', ''))", @identifierType = "ark";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdPmid//value/replace(., 'https?://www.ncbi.nlm.nih.gov/pmc/articles/pmid/(\d+)(/.*)?', '$1'))", @identifierType = "pmid";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdPmc//value/replace(., 'https?://(www.ncbi.nlm.nih.gov/pmc|europepmc.org)/articles/(PMC\d*)([/\?].*)?', '$2'))", @identifierType = "pmcid";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(., '/document', ''))", @identifierType = "hal";);
|
||||
oaf:identifier = set(xpath:"$varIdArxv//value", @identifierType = "arxiv";);
|
||||
oaf:identifier = set(xpath:"$varIdLdpg//value", @identifierType = "landingPage";);
|
||||
oaf:identifier = set(xpath:"($varIdUrl//value[not(starts-with(., 'www'))], $varIdUrl//value[starts-with(., 'www')]/concat('http://', .))", @identifierType = "url";);
|
||||
|
||||
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
|
||||
|
||||
//$varJournalName = xpath:"substring-before(//dc:source, ',')";
|
||||
$varJournalTitle = xpath:"(//dc:source[contains(., ', Vol ')]/substring-before(., ', Vol '), //dc:source[contains(., ', Iss ')]/substring-before(., ', Iss '))[1]";
|
||||
$varVol = xpath:"//dc:source[contains(., ', Vol ')][matches(., ', Vol \d+')]/replace(substring-after(., ', Vol '), '^(\d+).*$', '$1')";
|
||||
$varIss = xpath:"//dc:source[contains(., ', Iss ')][matches(., ', Iss \d+')]/replace(substring-after(., ', Iss '), '^(\d+).*$', '$1')";
|
||||
$varSp = xpath:"//dc:source[contains(., ', Pp ')][matches(., ', Pp \d+-\d+')]/substring-before(substring-after(., ', Pp '), '-')";
|
||||
$varEp = xpath:"//dc:source[contains(., ', Pp ')][matches(., ', Pp \d+-\d+')]/replace(substring-after(substring-after(., ', Pp '), '-'), '^(\d+).*$', '$1')";
|
||||
$varISSN = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][1]";
|
||||
//oaf:journal = set($varJournalName, @issn = xpath:"//dc:identifier[string-length() = 9 and matches(., '^(\d{4})-(\d{4}|\d{3}X)')][1]"; , @eissn = xpath:"//dc:identifier[string-length() = 9 and matches(., '^(\d{4})-(\d{4}|\d{3}X)')][2]";);
|
||||
//oaf:journal = set($varJournalName, @issn = xpath:"//dc:identifier[string-length() = 9]";);
|
||||
oaf:journal = set($varJournalTitle, @issn = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][1]";, @eissn = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][2]";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";);
|
||||
|
||||
end
|
|
@ -0,0 +1,492 @@
|
|||
<!-- from PROD 2021-06-14 -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:transformExt="http://namespace.openaire.eu/java/org.apache.commons.codec.digest.DigestUtils"
|
||||
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
|
||||
extension-element-prefixes="transformExt TransformationFunction"
|
||||
exclude-result-prefixes="transformExt TransformationFunction" >
|
||||
<xsl:output indent="yes" omit-xml-declaration="yes"/>
|
||||
|
||||
<!--
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
|
||||
-->
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
|
||||
<xsl:param name="varFP7OtherDOI" select="'10.13039/100011102'"/>
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
|
||||
<xsl:param name="varFP7" select="'corda_______::'"/>
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
|
||||
|
||||
<xsl:param name="index" select="0"/>
|
||||
<xsl:param name="transDate" select="current-dateTime()"/>
|
||||
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate"/>
|
||||
</xsl:if>
|
||||
<!-- in journal.fi xml:lang of translated titles is not within the trans-title element but within the surrounding trans-title-group element (which just contains 1 trans-title element) -->
|
||||
<!--
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()=('article-title', 'trans-title-group')][string-length(normalize-space(.))> 0]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:title'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="title">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name()='title-group']//*[local-name()=('article-title', 'trans-title', 'subtitle', 'trans-subtitle')]"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))]"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group'][@content-type='author']/*[local-name() = 'contrib']"/>
|
||||
</xsl:call-template>
|
||||
<!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()=('abstract', 'trans-abstract')]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:description'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group']//*[local-name()='kwd']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:value-of select="//*[local-name()='metadata']//*[local-name()='article']/@xml:lang" />
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="//*[local-name()='article-meta']/*[local-name()='self-uri'][contains(./@xlink:href, '/view/')]/@xlink:href" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<!--
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])" />
|
||||
|
||||
<xsl:value-of select="TransformationFunction:Convert($tf, //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub'], 'DateISO8601', 'yyyy-MM-dd', 'min()')" />
|
||||
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/replace(concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day']), '-(\d)([-$])', '-0$1$2')" />
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']), 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']), 2))" />
|
||||
-->
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']) idiv 2 + 1, 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']) idiv 2 +1, 2))" />
|
||||
|
||||
</xsl:element>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub']">
|
||||
<xsl:choose>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2] and ./*[local-name()='day' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="./*[local-name()='year']"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()='meta-value'], //*[local-name()='permissions']/*[local-name()='copyright-statement']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='permissions']/*[local-name()='license']/@xlink:href"/>
|
||||
<xsl:with-param name="targetElement" select="'oaf:license'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri'][not(./@content-type = 'application/pdf')]/@xlink:href">
|
||||
<oaf:identifier>
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>landingPage</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri' and ./@content-type='application/pdf' and //oaf:datasourceprefix = ('ambientesust', 'qualityinedu')]/@xlink:href/replace(., '/view/', '/download/')">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI) or ends-with(., $varFP7OtherDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<!-- -->
|
||||
<xsl:variable name='varRights' select="distinct-values((for $i in (
|
||||
//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href,
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and not( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
and not( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())])]/'open',
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and (( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
or ( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())]))]/'embargo')
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'AccessRights')))" />
|
||||
|
||||
<!--
|
||||
and not((xs:date( max( (start_date, '0001-01-01') ) ) gt current-date()))
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read' and and not((xs:date( max( (./@start_date, '0001-01-01') ) ) gt current-date()))]/'open'
|
||||
-->
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="$varRights[. = 'EMBARGO']">
|
||||
<xsl:value-of select="'EMBARGO'"/>
|
||||
</xsl:when>
|
||||
<xsl:when test="$varRights[. != 'UNKNOWN']">
|
||||
<xsl:value-of select="$varRights[. != 'UNKNOWN'][1]"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<!--
|
||||
<oaf:accessrights>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:value-of select="(//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href)/TransformationFunction:convertString($tf, ., 'AccessRights')" />
|
||||
</xsl:element>
|
||||
-->
|
||||
|
||||
<!--
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:variable name='varCobjCategory' select="TransformationFunction:convertString($tf, //*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()='meta-value'], 'TextTypologies')" />
|
||||
<xsl:variable name='varSuperType' select="TransformationFunction:convertString($tf, $varCobjCategory, 'SuperTypes')" />
|
||||
<xsl:attribute name="type" select="$varSuperType"/>
|
||||
<xsl:value-of select="$varCobjCategory" />
|
||||
</xsl:element>
|
||||
|
||||
<xsl:variable name='varCobjCatLst' select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')" />
|
||||
-->
|
||||
|
||||
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type))"/>
|
||||
<xsl:variable name='varCobjCatLst' select="distinct-values((for $i in $varTypLst
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')))" />
|
||||
<xsl:variable name='varCobjSupLst' select="for $i in $varCobjCatLst
|
||||
return concat($i, '###', TransformationFunction:convertString($tf, normalize-space($i), 'SuperTypes'))" />
|
||||
<dr:CobjCategory>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:attribute name="type" select="'other'"/>
|
||||
<xsl:value-of select="'0000'" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</dr:CobjCategory>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="$varCobjSupLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<xsl:for-each select="$varTypLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="(//*[local-name()='article']/@article-type, //*[local-name() = 'custom-meta' and ./@specific-use = 'resource-type']/*[local-name() = ('meta-value', 'meta-name')])">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<oaf:language>
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name()='metadata']//*[local-name()='article']/@xml:lang, 'Languages')" />
|
||||
</oaf:language>
|
||||
|
||||
<!-- review status -->
|
||||
<!-- ToDo:
|
||||
review status
|
||||
~ ask Journal.fi to put it elsewhere
|
||||
~ evaluate article-version (no example found yet)
|
||||
subject/kwd:
|
||||
~ handle thesauri (no example found yet)
|
||||
relations:
|
||||
~ handle fn (no example found yet)
|
||||
-->
|
||||
<!--
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
-->
|
||||
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in ($varTypLst)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedSubjt" select="//*[local-name() = 'article-categories' and contains(//dri:recordIdentifier, 'oai:journal.fi')]/*[local-name() = 'subj-group' and ./@subj-group-type='heading']/*[local-name() = 'subject' and . = 'Peer reviewed articles']/'0001'"/>
|
||||
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedSubjt)"/>
|
||||
<!--
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereedDescp"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereed"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="count($varRefereed[. = '0001']) > 0"/>
|
||||
</oaf:refereed>
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:param name="targetElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:if test="(.[@xml:lang] or ..[@xml:lang]) and $targetElement = ('dc:title', 'dc:description', 'dc:subject')">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="title">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:title">
|
||||
<xsl:if test=".[@xml:lang] or ..[@xml:lang]">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="string-join((., ./following-sibling::*[local-name() = ('subtitle', 'trans-subtitle')])/normalize-space(.), ': ')"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle"/>
|
||||
<xsl:param name="issn"/>
|
||||
<xsl:param name="eissn"/>
|
||||
<xsl:param name="vol"/>
|
||||
<xsl:param name="issue"/>
|
||||
<xsl:param name="sp"/>
|
||||
<xsl:param name="ep"/>
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:if test="string-length($sourceElement[@pub-id-type='doi']) gt 0">
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate"/>
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,437 @@
|
|||
<!-- from production 2021-0614 -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:transformExt="http://namespace.openaire.eu/java/org.apache.commons.codec.digest.DigestUtils"
|
||||
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
|
||||
extension-element-prefixes="transformExt TransformationFunction"
|
||||
exclude-result-prefixes="transformExt TransformationFunction" >
|
||||
<xsl:output indent="yes" omit-xml-declaration="yes"/>
|
||||
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
|
||||
<xsl:param name="varFP7" select="'corda_______::'"/>
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
|
||||
|
||||
<xsl:param name="index" select="0"/>
|
||||
<xsl:param name="transDate" select="current-dateTime()"/>
|
||||
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate"/>
|
||||
</xsl:if>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:title'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'contrib'][@contrib-type='author']"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))][./*[local-name()='name'] or ./*[local-name()='name-alternatives']/*[local-name()='name']][string-length(.//*[local-name()='surname']) + string-length(.//*[local-name()='given-names']) > 0]"/>
|
||||
</xsl:call-template>
|
||||
<!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()='abstract']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:description'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group' and not(lower-case(@kwd-group-type)=('mesh', 'ocis'))]//*[local-name()='kwd']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='mesh' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'mesh'"/>
|
||||
<xsl:attribute name="schemeURI" select="'http://www.nlm.nih.gov/mesh/'"/>
|
||||
<xsl:attribute name="valueURI" select="''"/>
|
||||
<xsl:value-of select="./concat('mesh:', replace(., 'mesh (.*)$', '$1'))"/>
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='ocis' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'ocis'"/>
|
||||
<xsl:attribute name="schemeURI" select="''"/>
|
||||
<xsl:attribute name="valueURI" select="''"/>
|
||||
<xsl:value-of select="./concat('ocis:', .)"/>
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/(*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version'], *[local-name() = 'article-version'])/concat('article-version (', @article-version-type, ') ', .)"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:text>eng</xsl:text>
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:fulltext">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<xsl:choose>
|
||||
<xsl:when test="//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub'] or //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']" >
|
||||
<xsl:if test="string(number($month)) eq 'NaN'" >
|
||||
<xsl:value-of select="concat($year, '-', '01', '-', '01')" />
|
||||
</xsl:if>
|
||||
<xsl:if test="string(number($month)) != 'NaN'" >
|
||||
<xsl:value-of select="concat($year, '-', $month, '-', '01')" />
|
||||
</xsl:if>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat(//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='ppub']/*[local-name()='year'], '-01-01')" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:element>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='copyright-statement'])"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='license'])"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:text>OPEN</xsl:text>
|
||||
</xsl:element>
|
||||
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:attribute name="type" select="'publication'"/>
|
||||
<xsl:text>0001</xsl:text>
|
||||
</xsl:element>
|
||||
|
||||
<dc:type>
|
||||
<xsl:value-of select="//*[local-name() = 'article']/@article-type"/>
|
||||
</dc:type>
|
||||
|
||||
<!-- custom-meta perhaps not used for types, then drop
|
||||
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type))"/>
|
||||
<xsl:variable name='varTypLst' select="//*[local-name() = 'article']/@article-type"/>
|
||||
-->
|
||||
<!-- perhaps ensure that file indeed exists, e.g. as pdf etc -->
|
||||
<!--
|
||||
// reduce load for the big PubMed records by exchanging variables with choose
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedFnote" select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
|
||||
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
|
||||
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
|
||||
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
|
||||
matches(lower-case(.), '.*refereed\s*anonymously.*') or
|
||||
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*')
|
||||
]/'0001'"/>
|
||||
<xsl:variable name="varRefereedReviw" select="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
|
||||
matches(lower-case(.), '.*peer\s*review\s*file.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedReltn" select="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]/'0002'"/>
|
||||
<xsl:variable name="varRefereedCtRol" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
|
||||
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
|
||||
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]/'0001'"/>
|
||||
<xsl:variable name="varRefereedVersn" select="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]/'0002'"/>
|
||||
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedFnote, $varRefereedReviw, $varRefereedReltn, $varRefereedCtRol, $varRefereedVersn)"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
-->
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'article-id'][@pub-id-type='doi'][matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
|
||||
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
|
||||
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
|
||||
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
|
||||
matches(lower-case(.), '.*refereed\s*anonymously.*') or
|
||||
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*') or
|
||||
matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\].*') or
|
||||
matches(lower-case(.), '.*\[.*referees\s*:.*\].*') or
|
||||
matches(lower-case(.), '^\s*plagiarism[\s\-\._]check.*') or
|
||||
matches(lower-case(.), '^\s*peer[\s\-\._]*review.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)reviewer.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)review\s*reports?.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
|
||||
matches(lower-case(.), '.*peer\s*review\s*file.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
|
||||
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
|
||||
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
|
||||
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varHostedByName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varHostedById"/>
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
|
||||
<xsl:for-each select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = 'fn-group']/*[local-name() = 'fn'][matches(lower-case(.), 'country(/territory)? of origin:?\s*[A-Za-z\-]+')]">
|
||||
<oaf:country>
|
||||
<!--
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, replace(lower-case(.), '^(.|\s)*country(/territory)? of origin:?\s+([A-Za-z\-,\(\)]+(\s+[A-Za-z\-,\(\)]+)*)(.|\s)*$', '$3'), 'Countries')"/>
|
||||
-->
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(substring(substring-after(lower-case(.), 'of origin'), 2)), 'Countries')"/>
|
||||
</oaf:country>
|
||||
</xsl:for-each>
|
||||
|
||||
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:param name="targetElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle"/>
|
||||
<xsl:param name="issn"/>
|
||||
<xsl:param name="eissn"/>
|
||||
<xsl:param name="vol"/>
|
||||
<xsl:param name="issue"/>
|
||||
<xsl:param name="sp"/>
|
||||
<xsl:param name="ep"/>
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmc</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmcid']"/>
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmid</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmid']"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<!--
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
-->
|
||||
<xsl:value-of select="concat(normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='surname']), ', ', normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='given-names']))"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate"/>
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,493 @@
|
|||
<!-- from PROD 2021-06-14 -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner"
|
||||
version="2.0">
|
||||
|
||||
<!--
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
|
||||
-->
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
|
||||
<xsl:param name="varFP7OtherDOI" select="'10.13039/100011102'"/>
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
|
||||
<xsl:param name="varFP7" select="'corda_______::'"/>
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
|
||||
|
||||
<xsl:param name="index" select="0"/>
|
||||
<xsl:param name="transDate" select="current-dateTime()"/>
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate"/>
|
||||
</xsl:if>
|
||||
<!-- in journal.fi xml:lang of translated titles is not within the trans-title element but within the surrounding trans-title-group element (which just contains 1 trans-title element) -->
|
||||
<!--
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()=('article-title', 'trans-title-group')][string-length(normalize-space(.))> 0]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:title'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="title">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name()='title-group']//*[local-name()=('article-title', 'trans-title', 'subtitle', 'trans-subtitle')]"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))]"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group'][@content-type='author']/*[local-name() = 'contrib']"/>
|
||||
</xsl:call-template>
|
||||
<!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()=('abstract', 'trans-abstract')]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:description'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group']//*[local-name()='kwd']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:value-of select="//*[local-name()='metadata']//*[local-name()='article']/@xml:lang" />
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="//*[local-name()='article-meta']/*[local-name()='self-uri'][contains(./@xlink:href, '/view/')]/@xlink:href" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<!--
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])" />
|
||||
|
||||
<xsl:value-of select="TransformationFunction:Convert($tf, //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub'], 'DateISO8601', 'yyyy-MM-dd', 'min()')" />
|
||||
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/replace(concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day']), '-(\d)([-$])', '-0$1$2')" />
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']), 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']), 2))" />
|
||||
-->
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']) idiv 2 + 1, 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']) idiv 2 +1, 2))" />
|
||||
|
||||
</xsl:element>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub']">
|
||||
<xsl:choose>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2] and ./*[local-name()='day' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="./*[local-name()='year']"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()='meta-value'], //*[local-name()='permissions']/*[local-name()='copyright-statement']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='permissions']/*[local-name()='license']/@xlink:href"/>
|
||||
<xsl:with-param name="targetElement" select="'oaf:license'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri'][not(./@content-type = 'application/pdf')]/@xlink:href">
|
||||
<oaf:identifier>
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>landingPage</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri' and ./@content-type='application/pdf' and //oaf:datasourceprefix = ('ambientesust', 'qualityinedu')]/@xlink:href/replace(., '/view/', '/download/')">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI) or ends-with(., $varFP7OtherDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<!-- -->
|
||||
<xsl:variable name='varRights' select="distinct-values((for $i in (
|
||||
//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href,
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and not( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
and not( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())])]/'open',
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and (( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
or ( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())]))]/'embargo')
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:access_modes') "
|
||||
/>
|
||||
|
||||
<!--
|
||||
and not((xs:date( max( (start_date, '0001-01-01') ) ) gt current-date()))
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read' and and not((xs:date( max( (./@start_date, '0001-01-01') ) ) gt current-date()))]/'open'
|
||||
-->
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="$varRights[. = 'EMBARGO']">
|
||||
<xsl:value-of select="'EMBARGO'"/>
|
||||
</xsl:when>
|
||||
<xsl:when test="$varRights[. != 'UNKNOWN']">
|
||||
<xsl:value-of select="$varRights[. != 'UNKNOWN'][1]"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<!--
|
||||
<oaf:accessrights>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:value-of select="(//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href)/TransformationFunction:convertString($tf, ., 'AccessRights')" />
|
||||
</xsl:element>
|
||||
-->
|
||||
|
||||
<!--
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:variable name='varCobjCategory' select="TransformationFunction:convertString($tf, //*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()='meta-value'], 'TextTypologies')" />
|
||||
<xsl:variable name='varSuperType' select="TransformationFunction:convertString($tf, $varCobjCategory, 'SuperTypes')" />
|
||||
<xsl:attribute name="type" select="$varSuperType"/>
|
||||
<xsl:value-of select="$varCobjCategory" />
|
||||
</xsl:element>
|
||||
|
||||
<xsl:variable name='varCobjCatLst' select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')" />
|
||||
-->
|
||||
|
||||
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type))"/>
|
||||
<xsl:variable name='varCobjCatLst' select="distinct-values((for $i in $varTypLst
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:dnet:publication_resource')))" />
|
||||
<xsl:variable name='varCobjSupLst' select="for $i in $varCobjCatLst
|
||||
return concat($i, '###', vocabulary:clean( normalize-space($i), 'dnet:result_typologies'))" />
|
||||
<dr:CobjCategory>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:attribute name="type" select="'other'"/>
|
||||
<xsl:value-of select="'0000'" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</dr:CobjCategory>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="$varCobjSupLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<xsl:for-each select="$varTypLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="(//*[local-name()='article']/@article-type, //*[local-name() = 'custom-meta' and ./@specific-use = 'resource-type']/*[local-name() = ('meta-value', 'meta-name')])">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<oaf:language>
|
||||
<xsl:value-of select="vocabulary:clean( //*[local-name()='metadata']//*[local-name()='article']/@xml:lang, 'dnet:languages')" />
|
||||
</oaf:language>
|
||||
|
||||
<!-- review status -->
|
||||
<!-- ToDo:
|
||||
review status
|
||||
~ ask Journal.fi to put it elsewhere
|
||||
~ evaluate article-version (no example found yet)
|
||||
subject/kwd:
|
||||
~ handle thesauri (no example found yet)
|
||||
relations:
|
||||
~ handle fn (no example found yet)
|
||||
-->
|
||||
<!--
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
-->
|
||||
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in ($varTypLst)
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:review_levels')"/>
|
||||
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedSubjt" select="//*[local-name() = 'article-categories' and contains(//dri:recordIdentifier, 'oai:journal.fi')]/*[local-name() = 'subj-group' and ./@subj-group-type='heading']/*[local-name() = 'subject' and . = 'Peer reviewed articles']/'0001'"/>
|
||||
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedSubjt)"/>
|
||||
<!--
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereedDescp"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereed"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="count($varRefereed[. = '0001']) > 0"/>
|
||||
</oaf:refereed>
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:param name="targetElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:if test="(.[@xml:lang] or ..[@xml:lang]) and $targetElement = ('dc:title', 'dc:description', 'dc:subject')">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="title">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:title">
|
||||
<xsl:if test=".[@xml:lang] or ..[@xml:lang]">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="string-join((., ./following-sibling::*[local-name() = ('subtitle', 'trans-subtitle')])/normalize-space(.), ': ')"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle"/>
|
||||
<xsl:param name="issn"/>
|
||||
<xsl:param name="eissn"/>
|
||||
<xsl:param name="vol"/>
|
||||
<xsl:param name="issue"/>
|
||||
<xsl:param name="sp"/>
|
||||
<xsl:param name="ep"/>
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:if test="string-length($sourceElement[@pub-id-type='doi']) gt 0">
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate"/>
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,373 @@
|
|||
<!-- for adaptation , 2021-06-14 PROD -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner"
|
||||
version="2.0">
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:output indent="yes" omit-xml-declaration="yes" />
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'" />
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'" />
|
||||
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'" />
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'" />
|
||||
<xsl:param name="varFP7" select="'corda_______::'" />
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'" />
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
|
||||
<xsl:param name="index" select="0" />
|
||||
<xsl:param name="transDate" select="current-dateTime()" />
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0]" />
|
||||
<xsl:with-param name="targetElement" select="'dc:title'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'contrib'][@contrib-type='author']"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))][./*[local-name()='name'] or ./*[local-name()='name-alternatives']/*[local-name()='name']][string-length(.//*[local-name()='surname']) + string-length(.//*[local-name()='given-names']) > 0]" />
|
||||
</xsl:call-template> <!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()='abstract']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:description'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group' and not(lower-case(@kwd-group-type)=('mesh', 'ocis'))]//*[local-name()='kwd']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'" />
|
||||
</xsl:call-template>
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='mesh' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'mesh'" />
|
||||
<xsl:attribute name="schemeURI" select="'http://www.nlm.nih.gov/mesh/'" />
|
||||
<xsl:attribute name="valueURI" select="''" />
|
||||
<xsl:value-of select="./concat('mesh:', replace(., 'mesh (.*)$', '$1'))" />
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='ocis' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'ocis'" />
|
||||
<xsl:attribute name="schemeURI" select="''" />
|
||||
<xsl:attribute name="valueURI" select="''" />
|
||||
<xsl:value-of select="./concat('ocis:', .)" />
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:source'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/(*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version'], *[local-name() = 'article-version'])/concat('article-version (', @article-version-type, ') ', .)" />
|
||||
<xsl:with-param name="targetElement" select="'dc:source'" />
|
||||
</xsl:call-template>
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:text>eng</xsl:text>
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:fulltext">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<xsl:choose>
|
||||
<xsl:when test="//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub'] or //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']">
|
||||
<xsl:if test="string(number($month)) eq 'NaN'">
|
||||
<xsl:value-of select="concat($year, '-', '01', '-', '01')" />
|
||||
</xsl:if>
|
||||
<xsl:if test="string(number($month)) != 'NaN'">
|
||||
<xsl:value-of select="concat($year, '-', $month, '-', '01')" />
|
||||
</xsl:if>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat(//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='ppub']/*[local-name()='year'], '-01-01')" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:element>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='copyright-statement'])" />
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='license'])" />
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']" />
|
||||
</xsl:call-template>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:text>OPEN</xsl:text>
|
||||
</xsl:element>
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:attribute name="type" select="'publication'" />
|
||||
<xsl:text>0001</xsl:text>
|
||||
</xsl:element>
|
||||
<dc:type>
|
||||
<xsl:value-of select="//*[local-name() = 'article']/@article-type" />
|
||||
</dc:type>
|
||||
|
||||
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in (//*[local-name() = 'resource']/*[local-name() = ('resourceType', 'version')]/(., @uri))
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:review_levels')"/>
|
||||
|
||||
<!-- <xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')" />
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'article-id'][@pub-id-type='doi'][matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
|
||||
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
|
||||
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
|
||||
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
|
||||
matches(lower-case(.), '.*refereed\s*anonymously.*') or
|
||||
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*') or
|
||||
matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\].*') or
|
||||
matches(lower-case(.), '.*\[.*referees\s*:.*\].*') or
|
||||
matches(lower-case(.), '^\s*plagiarism[\s\-\._]check.*') or
|
||||
matches(lower-case(.), '^\s*peer[\s\-\._]*review.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)reviewer.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)review\s*reports?.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
|
||||
matches(lower-case(.), '.*peer\s*review\s*file.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
|
||||
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
|
||||
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']" />
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']" />
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']" />
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']" />
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']" />
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']" />
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']" />
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varHostedByName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varHostedById" />
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId" />
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
<xsl:for-each select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = 'fn-group']/*[local-name() = 'fn'][matches(lower-case(.), 'country(/territory)? of origin:?\s*[A-Za-z\-]+')]">
|
||||
<oaf:country>
|
||||
<!--
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, replace(lower-case(.), '^(.|\s)*country(/territory)? of origin:?\s+([A-Za-z\-,\(\)]+(\s+[A-Za-z\-,\(\)]+)*)(.|\s)*$', '$3'), 'Countries')"/>
|
||||
-->
|
||||
<!-- ACz, 2021-06-14
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(substring(substring-after(lower-case(.), 'of origin'), 2)), 'Countries')" />
|
||||
-->
|
||||
<xsl:value-of select="vocabulary:clean( normalize-space(substring(substring-after(lower-case(.), 'of origin'), 2)), 'dnet:countries')"/>
|
||||
</oaf:country>
|
||||
</xsl:for-each>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
</xsl:template>
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:param name="targetElement" />
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle" />
|
||||
<xsl:param name="issn" />
|
||||
<xsl:param name="eissn" />
|
||||
<xsl:param name="vol" />
|
||||
<xsl:param name="issue" />
|
||||
<xsl:param name="sp" />
|
||||
<xsl:param name="ep" />
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)" />
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)" />
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmc</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmcid']" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmid</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmid']" />
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')" />
|
||||
</xsl:attribute>
|
||||
</xsl:if> <!--
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
-->
|
||||
<xsl:value-of select="concat(normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='surname']), ', ', normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='given-names']))" />
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate" />
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -80,6 +80,7 @@ public class GenerateStatsJob {
|
|||
.map(
|
||||
(MapFunction<Tuple2<String, DatasourceStats>, DatasourceStats>) t -> t._2,
|
||||
Encoders.bean(DatasourceStats.class))
|
||||
.coalesce(1)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.jdbc(dbUrl, "oa_datasource_stats_temp", connectionProperties);
|
||||
|
|
|
@ -38,6 +38,9 @@ object DoiBoostMappingUtil {
|
|||
val OPENAIRE_PREFIX = "openaire____"
|
||||
val SEPARATOR = "::"
|
||||
|
||||
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
|
||||
val DOI_PREFIX = "10."
|
||||
|
||||
val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;")
|
||||
|
||||
def toActionSet(item:Oaf) :(String, String) = {
|
||||
|
@ -352,5 +355,28 @@ object DoiBoostMappingUtil {
|
|||
|
||||
}
|
||||
|
||||
def isEmpty(x: String) = x == null || x.trim.isEmpty
|
||||
|
||||
def normalizeDoi(input : String) :String ={
|
||||
if(input == null)
|
||||
return null
|
||||
val replaced = input.replaceAll("(?:\\n|\\r|\\t|\\s)", "").toLowerCase.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
|
||||
if (isEmpty(replaced))
|
||||
return null
|
||||
|
||||
if(replaced.indexOf("10.") < 0)
|
||||
return null
|
||||
|
||||
val ret = replaced.substring(replaced.indexOf("10."))
|
||||
|
||||
if (!ret.startsWith(DOI_PREFIX))
|
||||
return null
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ import scala.collection.mutable
|
|||
import scala.util.matching.Regex
|
||||
import java.util
|
||||
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
|
||||
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
||||
|
||||
case class mappingAffiliation(name: String) {}
|
||||
|
@ -87,7 +89,7 @@ case object Crossref2Oaf {
|
|||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
val doi: String = (json \ "DOI").extract[String]
|
||||
val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||
result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
|
||||
//MAPPING Crossref DOI into OriginalId
|
||||
|
@ -99,6 +101,7 @@ case object Crossref2Oaf {
|
|||
val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava)
|
||||
result.setOriginalId(originalIds)
|
||||
|
||||
|
||||
// Add DataInfo
|
||||
result.setDataInfo(generateDataInfo())
|
||||
|
||||
|
@ -140,7 +143,6 @@ case object Crossref2Oaf {
|
|||
result.setDateofacceptance(asField(issuedDate))
|
||||
}
|
||||
else {
|
||||
// TODO: take the oldest date between publishedPrint and publishedOnline
|
||||
result.setDateofacceptance(asField(createdDate.getValue))
|
||||
}
|
||||
result.setRelevantdate(List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate).filter(p => p != null).asJava)
|
||||
|
@ -408,14 +410,6 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
|
||||
def extractDump(input:String):List[String] = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
val a = (json \ "items").extract[JArray]
|
||||
a.arr.map(s => compact(render(s)))
|
||||
}
|
||||
|
||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
val containerTitles = for {JString(ct) <- json \ "container-title"} yield ct
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.hadoop.io.{IntWritable, Text}
|
||||
import org.apache.spark.SparkConf
|
||||
|
@ -21,7 +22,7 @@ object CrossrefDataset {
|
|||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
val ts:Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||
val doi:String = (json \ "DOI").extract[String]
|
||||
val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||
CrossrefDT(doi, input, ts)
|
||||
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item
|
||||
import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass
|
||||
import org.apache.hadoop.io.{IntWritable, Text}
|
||||
|
@ -27,7 +28,7 @@ object GenerateCrossrefDataset {
|
|||
def crossrefElement(meta: String): CrossrefDT = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(meta)
|
||||
val doi:String = (json \ "DOI").extract[String]
|
||||
val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||
val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||
CrossrefDT(doi, meta, timestamp)
|
||||
|
||||
|
|
|
@ -196,8 +196,8 @@ case object ConversionUtil {
|
|||
val authors = inputParams._2
|
||||
|
||||
val pub = new Publication
|
||||
pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
|
||||
pub.setPid(List(createSP(paper.Doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi).asJava)
|
||||
|
||||
//IMPORTANT
|
||||
//The old method result.setId(generateIdentifier(result, doi))
|
||||
|
@ -258,11 +258,14 @@ case object ConversionUtil {
|
|||
val description = inputParams._2
|
||||
|
||||
val pub = new Publication
|
||||
pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
|
||||
pub.setPid(List(createSP(paper.Doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi).asJava)
|
||||
|
||||
//Set identifier as 50 | doiboost____::md5(DOI)
|
||||
pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
|
||||
//IMPORTANT
|
||||
//The old method result.setId(generateIdentifier(result, doi))
|
||||
//will be replaced using IdentifierFactory
|
||||
|
||||
pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub))
|
||||
|
||||
val mainTitles = createSP(paper.PaperTitle, "main title", ModelConstants.DNET_DATACITE_TITLE)
|
||||
val originalTitles = createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE)
|
||||
|
|
|
@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.mag
|
|||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
|
@ -12,6 +13,23 @@ import org.slf4j.{Logger, LoggerFactory}
|
|||
import scala.collection.JavaConverters._
|
||||
|
||||
object SparkProcessMAG {
|
||||
|
||||
def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={
|
||||
d.where(col("Doi").isNotNull)
|
||||
.groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING)
|
||||
.reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2))
|
||||
.map(_._2)(Encoders.product[MagPapers])
|
||||
.map(mp => {
|
||||
new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi),
|
||||
mp.DocType, mp.PaperTitle, mp.OriginalTitle,
|
||||
mp.BookTitle, mp.Year, mp.Date, mp.Publisher: String,
|
||||
mp.JournalId, mp.ConferenceSeriesId, mp.ConferenceInstanceId,
|
||||
mp.Volume, mp.Issue, mp.FirstPage, mp.LastPage,
|
||||
mp.ReferenceCount, mp.CitationCount, mp.EstimatedCitation,
|
||||
mp.OriginalVenue, mp.FamilyId, mp.CreatedDate)
|
||||
})(Encoders.product[MagPapers])
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
|
@ -33,17 +51,11 @@ object SparkProcessMAG {
|
|||
implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
|
||||
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
|
||||
|
||||
logger.info("Phase 1) make uninque DOI in Papers:")
|
||||
logger.info("Phase 1) make uninue DOI in Papers:")
|
||||
val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers]
|
||||
|
||||
// Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one
|
||||
val result: RDD[MagPapers] = d.where(col("Doi").isNotNull)
|
||||
.rdd
|
||||
.map{ p: MagPapers => Tuple2(p.Doi, p) }
|
||||
.reduceByKey((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2))
|
||||
.map(_._2)
|
||||
|
||||
val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
|
||||
val distinctPaper: Dataset[MagPapers] = getDistinctResults(d)
|
||||
|
||||
distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct")
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ object ORCIDToOAF {
|
|||
JField("type", JString(typeValue)) <- extIds
|
||||
JField("value", JString(value)) <- extIds
|
||||
if "doi".equalsIgnoreCase(typeValue)
|
||||
} yield (typeValue, value)
|
||||
} yield (typeValue, DoiBoostMappingUtil.normalizeDoi(value))
|
||||
if (doi.nonEmpty) {
|
||||
return doi.map(l =>OrcidWork(oid, l._2))
|
||||
}
|
||||
|
@ -102,7 +102,7 @@ object ORCIDToOAF {
|
|||
def convertTOOAF(input:ORCIDItem) :Publication = {
|
||||
val doi = input.doi
|
||||
val pub:Publication = new Publication
|
||||
pub.setPid(List(createSP(doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
pub.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
pub.setDataInfo(generateDataInfo())
|
||||
|
||||
pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub))
|
||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.uw
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
||||
import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication}
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
@ -53,7 +54,10 @@ object UnpayWallToOAF {
|
|||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
val doi = (json \"doi").extract[String]
|
||||
val doi = DoiBoostMappingUtil.normalizeDoi((json \"doi").extract[String])
|
||||
|
||||
if(doi == null)
|
||||
return null
|
||||
|
||||
val is_oa = (json\ "is_oa").extract[Boolean]
|
||||
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
package eu.dnetlib.dhp.doiboost
|
||||
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class NormalizeDOITest {
|
||||
|
||||
@Test
|
||||
def doiDSLowerCase():Unit = {
|
||||
val doi ="10.1042/BCJ20160876"
|
||||
|
||||
assert(DoiBoostMappingUtil.normalizeDoi(doi).equals(doi.toLowerCase()))
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def doiFiltered():Unit = {
|
||||
val doi = "0.1042/BCJ20160876"
|
||||
|
||||
assert(DoiBoostMappingUtil.normalizeDoi(doi) == null)
|
||||
}
|
||||
|
||||
@Test
|
||||
def doiFiltered2():Unit = {
|
||||
val doi = "https://doi.org/0.1042/BCJ20160876"
|
||||
|
||||
assert(DoiBoostMappingUtil.normalizeDoi(doi) == null)
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def doiCleaned():Unit = {
|
||||
val doi = "https://doi.org/10.1042/BCJ20160876"
|
||||
|
||||
assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase()))
|
||||
}
|
||||
|
||||
@Test
|
||||
def doiCleaned1():Unit = {
|
||||
val doi = "https://doi.org/10.1042/ BCJ20160876"
|
||||
|
||||
assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase()))
|
||||
}
|
||||
|
||||
}
|
|
@ -461,5 +461,37 @@ class CrossrefMappingTest {
|
|||
// })
|
||||
}
|
||||
|
||||
@Test
|
||||
def testNormalizeDOI(): Unit = {
|
||||
val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString
|
||||
val line :String = "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
|
||||
val json = template.replace("%s", line)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
|
||||
result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi")))
|
||||
assertTrue(result.getPid.size() == 1)
|
||||
result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())))
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testNormalizeDOI2(): Unit = {
|
||||
val template = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(template)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
|
||||
result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi")))
|
||||
assertTrue(result.getPid.size() == 1)
|
||||
result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())))
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import java.sql.Timestamp
|
|||
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
import org.apache.spark.api.java.function.MapFunction
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
|
@ -62,6 +62,55 @@ class MAGMappingTest {
|
|||
logger.debug(description)
|
||||
|
||||
}
|
||||
@Test
|
||||
def normalizeDoiTest():Unit = {
|
||||
|
||||
import org.json4s.jackson.Serialization.write
|
||||
import org.json4s.DefaultFormats
|
||||
|
||||
implicit val formats = DefaultFormats
|
||||
|
||||
val conf = new SparkConf().setAppName("test").setMaster("local[2]")
|
||||
val sc = new SparkContext(conf)
|
||||
val spark = SparkSession.builder.config(sc.getConf).getOrCreate()
|
||||
val path = getClass.getResource("magPapers.json").getPath
|
||||
|
||||
import org.apache.spark.sql.Encoders
|
||||
val schema = Encoders.product[MagPapers].schema
|
||||
|
||||
import spark.implicits._
|
||||
val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers]
|
||||
val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers)
|
||||
assertTrue(ret.count == 10)
|
||||
ret.take(10).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase())))
|
||||
|
||||
spark.close()
|
||||
}
|
||||
|
||||
@Test
|
||||
def normalizeDoiTest2():Unit = {
|
||||
|
||||
import org.json4s.jackson.Serialization.write
|
||||
import org.json4s.DefaultFormats
|
||||
|
||||
implicit val formats = DefaultFormats
|
||||
|
||||
val conf = new SparkConf().setAppName("test").setMaster("local[2]")
|
||||
val sc = new SparkContext(conf)
|
||||
val spark = SparkSession.builder.config(sc.getConf).getOrCreate()
|
||||
val path = getClass.getResource("duplicatedMagPapers.json").getPath
|
||||
|
||||
import org.apache.spark.sql.Encoders
|
||||
val schema = Encoders.product[MagPapers].schema
|
||||
|
||||
import spark.implicits._
|
||||
val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers]
|
||||
val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers)
|
||||
assertTrue(ret.count == 8)
|
||||
ret.take(8).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase())))
|
||||
spark.close()
|
||||
//ret.take(8).foreach(mp => println(write(mp)))
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -12,6 +12,8 @@ import org.slf4j.{Logger, LoggerFactory}
|
|||
import java.nio.file.Path
|
||||
import scala.io.Source
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
class MappingORCIDToOAFTest {
|
||||
val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
@ -63,9 +65,26 @@ class MappingORCIDToOAFTest {
|
|||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testExtractDat1():Unit ={
|
||||
|
||||
|
||||
|
||||
val aList: List[OrcidAuthor] = List(OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null ),
|
||||
OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null ), OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null ))
|
||||
|
||||
val orcid:ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList)
|
||||
|
||||
val oaf = ORCIDToOAF.convertTOOAF(orcid)
|
||||
assert(oaf.getPid.size() == 1)
|
||||
oaf.getPid.toList.foreach(pid => assert(pid.getQualifier.getClassid.equals("doi")))
|
||||
oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876".toLowerCase())))
|
||||
//println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid)))
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -20,16 +20,26 @@ class UnpayWallMappingTest {
|
|||
|
||||
val Ilist = Source.fromInputStream(getClass.getResourceAsStream("input.json")).mkString
|
||||
|
||||
|
||||
var i:Int = 0
|
||||
for (line <-Ilist.lines) {
|
||||
val p = UnpayWallToOAF.convertToOAF(line)
|
||||
|
||||
if(p!= null) {
|
||||
assertTrue(p.getInstance().size()==1)
|
||||
if (i== 0){
|
||||
assertTrue(p.getPid.get(0).getValue.equals("10.1038/2211089b0"))
|
||||
}
|
||||
if (i== 1){
|
||||
assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00058.s001"))
|
||||
}
|
||||
if (i== 2){
|
||||
assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00086.s001"))
|
||||
}
|
||||
logger.info(s"ID : ${p.getId}")
|
||||
}
|
||||
assertNotNull(line)
|
||||
assertTrue(line.nonEmpty)
|
||||
i = i+1
|
||||
}
|
||||
|
||||
|
||||
|
@ -39,7 +49,9 @@ class UnpayWallMappingTest {
|
|||
val item = UnpayWallToOAF.convertToOAF(l)
|
||||
|
||||
assertEquals(item.getInstance().get(0).getAccessright.getOpenAccessRoute, OpenAccessRoute.bronze)
|
||||
|
||||
logger.info(mapper.writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
|
||||
"DOI": " 10.26850/1678-4618eqj.v35.1.2010.p41-46",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
|
||||
"DOI": "10.26850/1678-4618EQJ.v35.1.2010.p41-46",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
[{"PaperId":2866429360,"Rank":1,"Doi":"10.5465/AMBPP.2018.12619SYMPOSIUM","DocType":null,"PaperTitle":"new directions in research on conflict dynamics","OriginalTitle":"New Directions in Research on Conflict Dynamics","BookTitle":null,"Year":2018,"Date":"2018-07-09T00:00:00Z","Publisher":"Academy of Management Briarcliff Manor, NY 10510","JournalId":null,"Volume":"2018","Issue":"1","FirstPage":"12619","LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Academy of Management Proceedings","CreatedDate":"2018-07-19T00:00:00Z"},
|
||||
{"PaperId":2871494677,"Rank":2,"Doi":"10.1007/978-981-10-8971-8_33","DocType":null,"PaperTitle":"wild flame detection using weight adaptive particle filter from monocular video","OriginalTitle":"Wild Flame Detection Using Weight Adaptive Particle Filter from Monocular Video","BookTitle":null,"Year":2019,"Date":"2019-01-01T00:00:00Z","Publisher":"Springer, Singapore","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"357","LastPage":"365","ReferenceCount":14,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-07-19T00:00:00Z"},
|
||||
{"PaperId":2883520096,"Rank":3,"Doi":"10.5465/AMBPP .2018.12619SYMPOSIUM","DocType":"Journal","PaperTitle":"elaboracion de un corpus cacografico desde la disponibilidad lexica en estudiantes sevillanos un analisis para la ensenanza de la lengua","OriginalTitle":"Elaboración de un corpus cacográfico desde la disponibilidad léxica en estudiantes sevillanos. Un análisis para la enseñanza de la lengua","BookTitle":null,"Year":2018,"Date":"2018-07-13T00:00:00Z","Publisher":"Poli papers","JournalId":2738339871,"Volume":"13","Issue":"1","FirstPage":"119","LastPage":"131","ReferenceCount":28,"CitationCount":2,"EstimatedCitation":2,"OriginalVenue":"Revista de Lingüística y Lenguas Aplicadas","CreatedDate":"2018-08-03T00:00:00Z"},
|
||||
{"PaperId":2883800636,"Rank":4,"Doi":"10.1007/978-3-319-92513-4_4","DocType":null,"PaperTitle":"cognitive advantage of bilingualism and its criticisms","OriginalTitle":"Cognitive Advantage of Bilingualism and Its Criticisms","BookTitle":null,"Year":2018,"Date":"2018-01-01T00:00:00Z","Publisher":"Springer, Cham","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"67","LastPage":"89","ReferenceCount":74,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-08-03T00:00:00Z"},
|
||||
{"PaperId":2885023064,"Rank":5,"Doi":"10.1097/NNA.0000000000000647","DocType":"Journal","PaperTitle":"enhancing and advancing shared governance through a targeted decision making redesign","OriginalTitle":"Enhancing and Advancing Shared Governance Through a Targeted Decision-Making Redesign.","BookTitle":null,"Year":2018,"Date":"2018-09-01T00:00:00Z","Publisher":"J Nurs Adm","JournalId":194945867,"Volume":"48","Issue":"9","FirstPage":"445","LastPage":"451","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Journal of Nursing Administration","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2885607541,"Rank":1,"Doi":"10.1007/S10465-018-9283-7","DocType":"Journal","PaperTitle":"dance movement therapists attitudes and actions regarding lgbtqi and gender nonconforming communities","OriginalTitle":"Dance/Movement Therapists’ Attitudes and Actions Regarding LGBTQI and Gender Nonconforming Communities","BookTitle":null,"Year":2018,"Date":"2018-08-07T00:00:00Z","Publisher":"Springer US","JournalId":104993962,"Volume":"40","Issue":"2","FirstPage":"202","LastPage":"223","ReferenceCount":40,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"American Journal of Dance Therapy","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2886182429,"Rank":2,"Doi":"10.13039/501100003329","DocType":null,"PaperTitle":"caracteres de adaptacion en judia comun phaseolus vulgaris l aproximacion genetica e identificacion de qtls","OriginalTitle":"Caracteres de adaptación en judía común (Phaseolus vulgaris L.): aproximación genética e identificación de QTLs","BookTitle":null,"Year":2017,"Date":"2017-06-15T00:00:00Z","Publisher":"CSIC - Misión Biológica de Galicia (MBG)","JournalId":null,"Volume":null,"Issue":null,"FirstPage":null,"LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":null,"CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2887149460,"Rank":3,"Doi":"10.1093/FEMSLE/FNY192","DocType":"Journal","PaperTitle":"small extracellular particles with big potential for horizontal gene transfer membrane vesicles and gene transfer agents","OriginalTitle":"Small extracellular particles with big potential for horizontal gene transfer: membrane vesicles and gene transfer agents.","BookTitle":null,"Year":2018,"Date":"2018-10-01T00:00:00Z","Publisher":"Narnia","JournalId":34954451,"Volume":"365","Issue":"19","FirstPage":null,"LastPage":null,"ReferenceCount":124,"CitationCount":13,"EstimatedCitation":13,"OriginalVenue":"Fems Microbiology Letters","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2887446149,"Rank":4,"Doi":"10.5465/ambpp.2018.12619symposium","DocType":"Journal","PaperTitle":"notes from the field toxigenic vibrio cholerae o141 in a traveler to florida nebraska 2017","OriginalTitle":"Notes from the Field: Toxigenic Vibrio cholerae O141 in a Traveler to Florida — Nebraska, 2017","BookTitle":null,"Year":2018,"Date":"2018-08-03T00:00:00Z","Publisher":"Centers for Disease Control MMWR Office","JournalId":183158886,"Volume":"67","Issue":"30","FirstPage":"838","LastPage":"839","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Morbidity and Mortality Weekly Report","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2889180499,"Rank":5,"Doi":"10.1007/S10924-018-1299-Z","DocType":"Journal","PaperTitle":"hybrid adsorbent materials obtained by the combination of poly ethylene alt maleic anhydride with lignin and lignosulfonate","OriginalTitle":"Hybrid Adsorbent Materials Obtained by the Combination of Poly(ethylene-alt-maleic anhydride) with Lignin and Lignosulfonate","BookTitle":null,"Year":2018,"Date":"2018-08-30T00:00:00Z","Publisher":"Springer US","JournalId":193665811,"Volume":"26","Issue":"11","FirstPage":"4293","LastPage":"4302","ReferenceCount":29,"CitationCount":5,"EstimatedCitation":5,"OriginalVenue":"Journal of Polymers and The Environment","CreatedDate":"2018-09-07T00:00:00Z"}]
|
|
@ -0,0 +1,10 @@
|
|||
[{"PaperId":2866429360,"Rank":1,"Doi":"10.5465/AMBPP.2018.12619SYMPOSIUM","DocType":null,"PaperTitle":"new directions in research on conflict dynamics","OriginalTitle":"New Directions in Research on Conflict Dynamics","BookTitle":null,"Year":2018,"Date":"2018-07-09T00:00:00Z","Publisher":"Academy of Management Briarcliff Manor, NY 10510","JournalId":null,"Volume":"2018","Issue":"1","FirstPage":"12619","LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Academy of Management Proceedings","CreatedDate":"2018-07-19T00:00:00Z"},
|
||||
{"PaperId":2871494677,"Rank":2,"Doi":"10.1007/978-981-10-8971-8_33","DocType":null,"PaperTitle":"wild flame detection using weight adaptive particle filter from monocular video","OriginalTitle":"Wild Flame Detection Using Weight Adaptive Particle Filter from Monocular Video","BookTitle":null,"Year":2019,"Date":"2019-01-01T00:00:00Z","Publisher":"Springer, Singapore","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"357","LastPage":"365","ReferenceCount":14,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-07-19T00:00:00Z"},
|
||||
{"PaperId":2883520096,"Rank":3,"Doi":"10.4995/RLYLA.2018.9176","DocType":"Journal","PaperTitle":"elaboracion de un corpus cacografico desde la disponibilidad lexica en estudiantes sevillanos un analisis para la ensenanza de la lengua","OriginalTitle":"Elaboración de un corpus cacográfico desde la disponibilidad léxica en estudiantes sevillanos. Un análisis para la enseñanza de la lengua","BookTitle":null,"Year":2018,"Date":"2018-07-13T00:00:00Z","Publisher":"Poli papers","JournalId":2738339871,"Volume":"13","Issue":"1","FirstPage":"119","LastPage":"131","ReferenceCount":28,"CitationCount":2,"EstimatedCitation":2,"OriginalVenue":"Revista de Lingüística y Lenguas Aplicadas","CreatedDate":"2018-08-03T00:00:00Z"},
|
||||
{"PaperId":2883800636,"Rank":4,"Doi":"10.1007/978-3-319-92513-4_4","DocType":null,"PaperTitle":"cognitive advantage of bilingualism and its criticisms","OriginalTitle":"Cognitive Advantage of Bilingualism and Its Criticisms","BookTitle":null,"Year":2018,"Date":"2018-01-01T00:00:00Z","Publisher":"Springer, Cham","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"67","LastPage":"89","ReferenceCount":74,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-08-03T00:00:00Z"},
|
||||
{"PaperId":2885023064,"Rank":5,"Doi":"10.1097/NNA.0000000000000647","DocType":"Journal","PaperTitle":"enhancing and advancing shared governance through a targeted decision making redesign","OriginalTitle":"Enhancing and Advancing Shared Governance Through a Targeted Decision-Making Redesign.","BookTitle":null,"Year":2018,"Date":"2018-09-01T00:00:00Z","Publisher":"J Nurs Adm","JournalId":194945867,"Volume":"48","Issue":"9","FirstPage":"445","LastPage":"451","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Journal of Nursing Administration","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2885607541,"Rank":1,"Doi":"10.1007/S10465-018-9283-7","DocType":"Journal","PaperTitle":"dance movement therapists attitudes and actions regarding lgbtqi and gender nonconforming communities","OriginalTitle":"Dance/Movement Therapists’ Attitudes and Actions Regarding LGBTQI and Gender Nonconforming Communities","BookTitle":null,"Year":2018,"Date":"2018-08-07T00:00:00Z","Publisher":"Springer US","JournalId":104993962,"Volume":"40","Issue":"2","FirstPage":"202","LastPage":"223","ReferenceCount":40,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"American Journal of Dance Therapy","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2886182429,"Rank":2,"Doi":"10.13039/501100003329","DocType":null,"PaperTitle":"caracteres de adaptacion en judia comun phaseolus vulgaris l aproximacion genetica e identificacion de qtls","OriginalTitle":"Caracteres de adaptación en judía común (Phaseolus vulgaris L.): aproximación genética e identificación de QTLs","BookTitle":null,"Year":2017,"Date":"2017-06-15T00:00:00Z","Publisher":"CSIC - Misión Biológica de Galicia (MBG)","JournalId":null,"Volume":null,"Issue":null,"FirstPage":null,"LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":null,"CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2887149460,"Rank":3,"Doi":"10.1093/FEMSLE/FNY192","DocType":"Journal","PaperTitle":"small extracellular particles with big potential for horizontal gene transfer membrane vesicles and gene transfer agents","OriginalTitle":"Small extracellular particles with big potential for horizontal gene transfer: membrane vesicles and gene transfer agents.","BookTitle":null,"Year":2018,"Date":"2018-10-01T00:00:00Z","Publisher":"Narnia","JournalId":34954451,"Volume":"365","Issue":"19","FirstPage":null,"LastPage":null,"ReferenceCount":124,"CitationCount":13,"EstimatedCitation":13,"OriginalVenue":"Fems Microbiology Letters","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2887446149,"Rank":4,"Doi":"10.15585/MMWR.MM6730A7","DocType":"Journal","PaperTitle":"notes from the field toxigenic vibrio cholerae o141 in a traveler to florida nebraska 2017","OriginalTitle":"Notes from the Field: Toxigenic Vibrio cholerae O141 in a Traveler to Florida — Nebraska, 2017","BookTitle":null,"Year":2018,"Date":"2018-08-03T00:00:00Z","Publisher":"Centers for Disease Control MMWR Office","JournalId":183158886,"Volume":"67","Issue":"30","FirstPage":"838","LastPage":"839","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Morbidity and Mortality Weekly Report","CreatedDate":"2018-08-22T00:00:00Z"},
|
||||
{"PaperId":2889180499,"Rank":5,"Doi":"10.1007/S10924-018-1299-Z","DocType":"Journal","PaperTitle":"hybrid adsorbent materials obtained by the combination of poly ethylene alt maleic anhydride with lignin and lignosulfonate","OriginalTitle":"Hybrid Adsorbent Materials Obtained by the Combination of Poly(ethylene-alt-maleic anhydride) with Lignin and Lignosulfonate","BookTitle":null,"Year":2018,"Date":"2018-08-30T00:00:00Z","Publisher":"Springer US","JournalId":193665811,"Volume":"26","Issue":"11","FirstPage":"4293","LastPage":"4302","ReferenceCount":29,"CitationCount":5,"EstimatedCitation":5,"OriginalVenue":"Journal of Polymers and The Environment","CreatedDate":"2018-09-07T00:00:00Z"}]
|
|
@ -1,6 +1,6 @@
|
|||
{"doi": "10.1038/2211089b0", "year": 1969, "genre": "journal-article", "is_oa": true, "title": "Planning: Trees in Danger", "doi_url": "https://doi.org/10.1038/2211089b0", "updated": "2020-02-06T13:51:15.164623", "oa_status": "bronze", "publisher": "Springer Nature", "z_authors": [{"name": "Our Planning Correspondent"}], "is_paratext": false, "journal_name": "Nature", "oa_locations": [{"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0028-0836,1476-4687", "journal_issn_l": "0028-0836", "published_date": "1969-03-01", "best_oa_location": {"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1021/acs.bioconjchem.8b00058.s001", "year": null, "genre": "component", "is_oa": true, "title": "Engineering Reversible CellCell Interactions with Lipid Anchored Prosthetic Receptors", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "updated": "2020-04-04T21:15:41.966773", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1021/acs.bioconjchem.8b00086.s001", "year": null, "genre": "component", "is_oa": true, "title": "Rapid, Stoichiometric, Site-Specific Modification of Aldehyde-Containing Proteins Using a Tandem Knoevenagel-Intra Michael Addition Reaction", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "updated": "2020-04-04T21:24:50.688286", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1038/221 1089b0", "year": 1969, "genre": "journal-article", "is_oa": true, "title": "Planning: Trees in Danger", "doi_url": "https://doi.org/10.1038/2211089b0", "updated": "2020-02-06T13:51:15.164623", "oa_status": "bronze", "publisher": "Springer Nature", "z_authors": [{"name": "Our Planning Correspondent"}], "is_paratext": false, "journal_name": "Nature", "oa_locations": [{"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0028-0836,1476-4687", "journal_issn_l": "0028-0836", "published_date": "1969-03-01", "best_oa_location": {"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1021/acs.bioconjchem.8b00058. s001", "year": null, "genre": "component", "is_oa": true, "title": "Engineering Reversible CellCell Interactions with Lipid Anchored Prosthetic Receptors", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "updated": "2020-04-04T21:15:41.966773", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1021/acs.bioconjCHEM.8b00086.s001", "year": null, "genre": "component", "is_oa": true, "title": "Rapid, Stoichiometric, Site-Specific Modification of Aldehyde-Containing Proteins Using a Tandem Knoevenagel-Intra Michael Addition Reaction", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "updated": "2020-04-04T21:24:50.688286", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1192/bjp.89.375.270", "year": 1943, "genre": "journal-article", "is_oa": false, "title": "Unusual Pituitary Activity in a Case of Anorexia Nervosa", "doi_url": "https://doi.org/10.1192/bjp.89.375.270", "updated": "2020-03-09T08:54:12.827623", "oa_status": "closed", "publisher": "Royal College of Psychiatrists", "z_authors": [{"given": "M.", "family": "Reiss", "sequence": "first"}], "is_paratext": false, "journal_name": "Journal of Mental Science", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0368-315X,2514-9946", "journal_issn_l": "0368-315X", "published_date": "1943-04-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1016/s0167-7012(99)00056-1", "year": 1999, "genre": "journal-article", "is_oa": false, "title": "Development of radiographic and microscopic techniques for the characterization of bacterial transport in intact sediment cores from Oyster, Virginia", "doi_url": "https://doi.org/10.1016/s0167-7012(99)00056-1", "updated": "2020-04-05T11:15:40.634599", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Hailiang", "family": "Dong", "sequence": "first"}, {"given": "Tullis C.", "family": "Onstott", "sequence": "additional"}, {"given": "Mary F.", "family": "DeFlaun", "sequence": "additional"}, {"given": "Mark E.", "family": "Fuller", "sequence": "additional"}, {"given": "Kathleen M.", "family": "Gillespie", "sequence": "additional"}, {"given": "James K.", "family": "Fredrickson", "sequence": "additional"}], "is_paratext": false, "journal_name": "Journal of Microbiological Methods", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0167-7012", "journal_issn_l": "0167-7012", "published_date": "1999-08-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1086/mp.1905.2.issue-3", "year": 1905, "genre": "journal-issue", "is_oa": false, "title": null, "doi_url": "https://doi.org/10.1086/mp.1905.2.issue-3", "updated": "2020-02-07T15:51:44.560109", "oa_status": "closed", "publisher": "University of Chicago Press", "z_authors": null, "is_paratext": false, "journal_name": "Modern Philology", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0026-8232,1545-6951", "journal_issn_l": "0026-8232", "published_date": "1905-01-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
|
@ -38,7 +38,7 @@
|
|||
{"doi": "10.1016/s1067-991x(03)70006-6", "year": 2003, "genre": "journal-article", "is_oa": false, "title": "Use of the autolaunch method of dispatching a helicopter", "doi_url": "https://doi.org/10.1016/s1067-991x(03)70006-6", "updated": "2020-03-12T07:24:35.659404", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Kathleen S.", "family": "Berns", "sequence": "first"}, {"given": "Jeffery J.", "family": "Caniglia", "sequence": "additional"}, {"given": "Daniel G.", "family": "Hankins", "sequence": "additional"}, {"given": "Scott P.", "family": "Zietlow", "sequence": "additional"}], "is_paratext": false, "journal_name": "Air Medical Journal", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1067-991X", "journal_issn_l": "1067-991X", "published_date": "2003-05-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1016/j.clinimag.2015.12.002", "year": 2016, "genre": "journal-article", "is_oa": false, "title": "Imaging findings, diagnosis, and clinical outcomes in patients with mycotic aneurysms: single center experience", "doi_url": "https://doi.org/10.1016/j.clinimag.2015.12.002", "updated": "2020-03-12T17:56:16.049536", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Amy R.", "family": "Deipolyi", "sequence": "first"}, {"given": "Alexander", "family": "Bailin", "sequence": "additional"}, {"given": "Ali", "family": "Khademhosseini", "sequence": "additional"}, {"ORCID": "http://orcid.org/0000-0003-4984-1778", "given": "Rahmi", "family": "Oklu", "sequence": "additional", "authenticated-orcid": false}], "is_paratext": false, "journal_name": "Clinical Imaging", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0899-7071", "journal_issn_l": "0899-7071", "published_date": "2016-05-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1016/j.biocel.2013.05.012", "year": 2013, "genre": "journal-article", "is_oa": false, "title": "MAVS-mediated host cell defense is inhibited by Borna disease virus", "doi_url": "https://doi.org/10.1016/j.biocel.2013.05.012", "updated": "2020-03-09T20:49:25.975316", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Yujun", "family": "Li", "sequence": "first"}, {"given": "Wuqi", "family": "Song", "sequence": "additional"}, {"given": "Jing", "family": "Wu", "sequence": "additional"}, {"given": "Qingmeng", "family": "Zhang", "sequence": "additional"}, {"given": "Junming", "family": "He", "sequence": "additional"}, {"given": "Aimei", "family": "Li", "sequence": "additional"}, {"given": "Jun", "family": "Qian", "sequence": "additional"}, {"given": "Aixia", "family": "Zhai", "sequence": "additional"}, {"given": "Yunlong", "family": "Hu", "sequence": "additional"}, {"given": "Wenping", "family": "Kao", "sequence": "additional"}, {"given": "Lanlan", "family": "Wei", "sequence": "additional"}, {"given": "Fengmin", "family": "Zhang", "sequence": "additional"}, {"given": "Dakang", "family": "Xu", "sequence": "additional"}], "is_paratext": false, "journal_name": "The International Journal of Biochemistry & Cell Biology", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1357-2725", "journal_issn_l": "1357-2725", "published_date": "2013-08-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1021/acsami.8b01074.s004", "year": null, "genre": "component", "is_oa": false, "title": "Solution Coating of Pharmaceutical Nanothin Films and Multilayer Nanocomposites with Controlled Morphology and Polymorphism", "doi_url": "https://doi.org/10.1021/acsami.8b01074.s004", "updated": "2020-04-04T21:02:07.815195", "oa_status": "closed", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1021/acsami.8b01074 .s004", "year": null, "genre": "component", "is_oa": false, "title": "Solution Coating of Pharmaceutical Nanothin Films and Multilayer Nanocomposites with Controlled Morphology and Polymorphism", "doi_url": "https://doi.org/10.1021/acsami.8b01074.s004", "updated": "2020-04-04T21:02:07.815195", "oa_status": "closed", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
{"doi": "10.1093/nar/18.18.5552", "year": 1990, "genre": "journal-article", "is_oa": true, "title": "Nucleotide sequence of LTR-gag region of Rous sarcoma virus adapted to semi-permissive host", "doi_url": "https://doi.org/10.1093/nar/18.18.5552", "updated": "2020-02-07T07:59:06.754183", "oa_status": "green", "publisher": "Oxford University Press (OUP)", "z_authors": [{"given": "Vladimir I.", "family": "Kashuba", "sequence": "first"}, {"given": "Serge V.", "family": "Zubak", "sequence": "additional"}, {"given": "Vadim M.", "family": "Kavsan", "sequence": "additional"}, {"given": "Alla V.", "family": "Rynditch", "sequence": "additional"}, {"given": "Ivo", "family": "Hlozanek", "sequence": "additional"}], "is_paratext": false, "journal_name": "Nucleic Acids Research", "oa_locations": [{"url": "http://europepmc.org/articles/pmc332244?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:332244", "is_best": true, "license": null, "updated": "2017-10-22T11:38:23.025497", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH doi match)", "host_type": "repository", "endpoint_id": "pubmedcentral.nih.gov", "url_for_pdf": "http://europepmc.org/articles/pmc332244?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc332244", "repository_institution": "pubmedcentral.nih.gov"}, {"url": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC332244", "pmh_id": null, "is_best": false, "license": null, "updated": "2020-04-24T18:18:02.810779", "version": "publishedVersion", "evidence": "oa repository (via pmcid lookup)", "host_type": "repository", "endpoint_id": null, "url_for_pdf": null, "url_for_landing_page": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC332244", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0305-1048,1362-4962", "journal_issn_l": "0305-1048", "published_date": "1990-01-01", "best_oa_location": {"url": "http://europepmc.org/articles/pmc332244?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:332244", "is_best": true, "license": null, "updated": "2017-10-22T11:38:23.025497", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH doi match)", "host_type": "repository", "endpoint_id": "pubmedcentral.nih.gov", "url_for_pdf": "http://europepmc.org/articles/pmc332244?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc332244", "repository_institution": "pubmedcentral.nih.gov"}, "journal_is_in_doaj": false, "has_repository_copy": true}
|
||||
{"doi": "10.1021/acsami.8b01294.s001", "year": null, "genre": "component", "is_oa": true, "title": "Highly Elastic Biodegradable Single-Network Hydrogel for Cell Printing", "doi_url": "https://doi.org/10.1021/acsami.8b01294.s001", "updated": "2020-04-04T22:12:53.813586", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acsami.8b01294.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T22:11:06.757648", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acsami.8b01294.s001", "url_for_landing_page": null, "repository_institution": null}, {"url": "http://europepmc.org/articles/pmc5876623?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:5876623", "is_best": false, "license": "acs-specific: authorchoice/editors choice usage agreement", "updated": "2020-02-19T13:50:59.876849", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH title match)", "host_type": "repository", "endpoint_id": "ac9de7698155b820de7", "url_for_pdf": "http://europepmc.org/articles/pmc5876623?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc5876623", "repository_institution": "National Institutes of Health (USA) - US National Library of Medicine"}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acsami.8b01294.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T22:11:06.757648", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acsami.8b01294.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": true}
|
||||
{"doi": "10.1097/scs.0b013e3181ef67ba", "year": 2010, "genre": "journal-article", "is_oa": false, "title": "Anomaly of the Internal Carotid Artery Detected During Tonsillectomy", "doi_url": "https://doi.org/10.1097/scs.0b013e3181ef67ba", "updated": "2020-02-10T19:05:26.462040", "oa_status": "closed", "publisher": "Ovid Technologies (Wolters Kluwer Health)", "z_authors": [{"given": "Serdar", "family": "Ceylan", "sequence": "first"}, {"given": "Serkan", "family": "Salman", "sequence": "additional"}, {"given": "Fatih", "family": "Bora", "sequence": "additional"}], "is_paratext": false, "journal_name": "Journal of Craniofacial Surgery", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1049-2275", "journal_issn_l": "1049-2275", "published_date": "2010-09-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
<configuration>
|
||||
<args>
|
||||
<arg>-Xmax-classfile-name</arg>
|
||||
<arg>140</arg>
|
||||
<arg>200</arg>
|
||||
</args>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
|
|
|
@ -3,8 +3,12 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
|||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -98,14 +102,9 @@ public class MergeClaimsApplication {
|
|||
raw
|
||||
.joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer")
|
||||
.map(
|
||||
(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> {
|
||||
Optional<Tuple2<String, T>> opRaw = Optional.ofNullable(value._1());
|
||||
Optional<Tuple2<String, T>> opClaim = Optional.ofNullable(value._2());
|
||||
|
||||
return opRaw.isPresent()
|
||||
? opRaw.get()._2()
|
||||
: opClaim.isPresent() ? opClaim.get()._2() : null;
|
||||
},
|
||||
(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> processClaims(
|
||||
Optional.ofNullable(value._1()),
|
||||
Optional.ofNullable(value._2())),
|
||||
Encoders.bean(clazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
|
@ -117,6 +116,78 @@ public class MergeClaimsApplication {
|
|||
.text(outPath);
|
||||
}
|
||||
|
||||
private static <T extends Oaf> T processClaims(Optional<Tuple2<String, T>> opRaw,
|
||||
Optional<Tuple2<String, T>> opClaim) {
|
||||
|
||||
// when both are present
|
||||
if (opClaim.isPresent() && opRaw.isPresent()) {
|
||||
T oafClaim = opClaim.get()._2();
|
||||
if (oafClaim instanceof Result) {
|
||||
T oafRaw = opRaw.get()._2();
|
||||
|
||||
// merge the context lists from both oaf objects ...
|
||||
final List<Context> context = mergeContexts((Result) oafClaim, (Result) oafRaw);
|
||||
|
||||
// ... and set it on the result from the aggregator
|
||||
((Result) oafRaw).setContext(context);
|
||||
return oafRaw;
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise prefer the result from the aggregator
|
||||
return opRaw.isPresent()
|
||||
? opRaw.get()._2()
|
||||
: opClaim.map(Tuple2::_2).orElse(null);
|
||||
}
|
||||
|
||||
private static List<Context> mergeContexts(Result oafClaim, Result oafRaw) {
|
||||
return new ArrayList<>(
|
||||
Stream
|
||||
.concat(
|
||||
Optional
|
||||
.ofNullable(oafClaim.getContext())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()),
|
||||
Optional
|
||||
.ofNullable(oafRaw.getContext())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()))
|
||||
.collect(
|
||||
Collectors
|
||||
.toMap(
|
||||
Context::getId,
|
||||
c -> c,
|
||||
(c1, c2) -> {
|
||||
Context c = new Context();
|
||||
c.setId(c1.getId());
|
||||
c
|
||||
.setDataInfo(
|
||||
new ArrayList<>(
|
||||
Stream
|
||||
.concat(
|
||||
Optional
|
||||
.ofNullable(c1.getDataInfo())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()),
|
||||
Optional
|
||||
.ofNullable(c2.getDataInfo())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()))
|
||||
.collect(
|
||||
Collectors
|
||||
.toMap(
|
||||
d -> Optional
|
||||
.ofNullable(d.getProvenanceaction())
|
||||
.map(Qualifier::getClassid)
|
||||
.orElse(""),
|
||||
d -> d,
|
||||
(d1, d2) -> d1))
|
||||
.values()));
|
||||
return c;
|
||||
}))
|
||||
.values());
|
||||
}
|
||||
|
||||
private static <T extends Oaf> Dataset<T> readFromPath(
|
||||
SparkSession spark, String path, Class<T> clazz) {
|
||||
return spark
|
||||
|
|
|
@ -480,38 +480,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
|
||||
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
|
||||
|
||||
final Relation r1 = new Relation();
|
||||
final Relation r2 = new Relation();
|
||||
|
||||
if (StringUtils.isNotBlank(validationDate)) {
|
||||
r1.setValidated(true);
|
||||
r1.setValidationDate(validationDate);
|
||||
r2.setValidated(true);
|
||||
r2.setValidationDate(validationDate);
|
||||
}
|
||||
r1.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
||||
r1.setSource(sourceId);
|
||||
r1.setTarget(targetId);
|
||||
r1.setDataInfo(DATA_INFO_CLAIM);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
r2.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
||||
r2.setSource(targetId);
|
||||
r2.setTarget(sourceId);
|
||||
r2.setDataInfo(DATA_INFO_CLAIM);
|
||||
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
Relation r1 = prepareRelation(sourceId, targetId, validationDate);
|
||||
Relation r2 = prepareRelation(targetId, sourceId, validationDate);
|
||||
|
||||
final String semantics = rs.getString("semantics");
|
||||
|
||||
switch (semantics) {
|
||||
case "resultResult_relationship_isRelatedTo":
|
||||
r1.setRelType(RESULT_RESULT);
|
||||
r1.setSubRelType(RELATIONSHIP);
|
||||
r1.setRelClass(IS_RELATED_TO);
|
||||
|
||||
r2.setRelType(RESULT_RESULT);
|
||||
r2.setSubRelType(RELATIONSHIP);
|
||||
r2.setRelClass(IS_RELATED_TO);
|
||||
r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
|
||||
r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
|
||||
break;
|
||||
case "resultProject_outcome_produces":
|
||||
if (!"project".equals(sourceType)) {
|
||||
|
@ -521,13 +498,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
"invalid claim, sourceId: %s, targetId: %s, semantics: %s",
|
||||
sourceId, targetId, semantics));
|
||||
}
|
||||
r1.setRelType(RESULT_PROJECT);
|
||||
r1.setSubRelType(OUTCOME);
|
||||
r1.setRelClass(PRODUCES);
|
||||
|
||||
r2.setRelType(RESULT_PROJECT);
|
||||
r2.setSubRelType(OUTCOME);
|
||||
r2.setRelClass(IS_PRODUCED_BY);
|
||||
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
|
||||
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
|
||||
break;
|
||||
case "resultResult_publicationDataset_isRelatedTo":
|
||||
r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
|
||||
r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("claim semantics not managed: " + semantics);
|
||||
|
@ -540,6 +516,27 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
}
|
||||
}
|
||||
|
||||
private Relation prepareRelation(String sourceId, String targetId, String validationDate) {
|
||||
Relation r = new Relation();
|
||||
if (StringUtils.isNotBlank(validationDate)) {
|
||||
r.setValidated(true);
|
||||
r.setValidationDate(validationDate);
|
||||
}
|
||||
r.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
||||
r.setSource(sourceId);
|
||||
r.setTarget(targetId);
|
||||
r.setDataInfo(DATA_INFO_CLAIM);
|
||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
return r;
|
||||
}
|
||||
|
||||
private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) {
|
||||
r.setRelType(relType);
|
||||
r.setSubRelType(subRelType);
|
||||
r.setRelClass(relClass);
|
||||
return r;
|
||||
}
|
||||
|
||||
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
|
||||
final Context context = new Context();
|
||||
context.setId(id);
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.provision;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpDelete;
|
||||
|
@ -12,96 +14,104 @@ import org.apache.http.impl.client.HttpClients;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class DropAndCreateESIndex {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(DropAndCreateESIndex.class);
|
||||
public static final String STATUS_CODE_TEXT = "status code: {}";
|
||||
public static final String APPLICATION_JSON = "application/json";
|
||||
private static final Logger log = LoggerFactory.getLogger(DropAndCreateESIndex.class);
|
||||
public static final String STATUS_CODE_TEXT = "status code: {}";
|
||||
public static final String APPLICATION_JSON = "application/json";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects.requireNonNull(DropAndCreateESIndex.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/sx/provision/dropAndCreateIndex.json"))));
|
||||
parser.parseArgument(args);
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
DropAndCreateESIndex.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/sx/provision/dropAndCreateIndex.json"))));
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String index = parser.get("index");
|
||||
final String index = parser.get("index");
|
||||
|
||||
final String cluster = parser.get("cluster");
|
||||
final String clusterJson = IOUtils
|
||||
.toString(Objects.requireNonNull(DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json")));
|
||||
final String cluster = parser.get("cluster");
|
||||
final String clusterJson = IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json")));
|
||||
|
||||
Map<String, String> clusterMap = new ObjectMapper().readValue(clusterJson, Map.class);
|
||||
|
||||
Map<String,String> clusterMap = new ObjectMapper().readValue(clusterJson,Map.class );
|
||||
final String ip = clusterMap.get(cluster).split(",")[0];
|
||||
|
||||
final String ip = clusterMap.get(cluster).split(",")[0];
|
||||
final String url = "http://%s:9200/%s_%s";
|
||||
|
||||
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
|
||||
final String url = "http://%s:9200/%s_%s";
|
||||
HttpDelete delete = new HttpDelete(String.format(url, ip, index, "object"));
|
||||
|
||||
try(CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
CloseableHttpResponse response = client.execute(delete);
|
||||
|
||||
HttpDelete delete = new HttpDelete(String.format(url, ip, index, "object"));
|
||||
log.info("deleting Index SUMMARY");
|
||||
log.info(STATUS_CODE_TEXT, response.getStatusLine());
|
||||
}
|
||||
|
||||
CloseableHttpResponse response = client.execute(delete);
|
||||
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
|
||||
log.info("deleting Index SUMMARY");
|
||||
log.info(STATUS_CODE_TEXT,response.getStatusLine());
|
||||
}
|
||||
HttpDelete delete = new HttpDelete(String.format(url, ip, index, "scholix"));
|
||||
|
||||
CloseableHttpResponse response = client.execute(delete);
|
||||
|
||||
try(CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
log.info("deleting Index SCHOLIX");
|
||||
log.info(STATUS_CODE_TEXT, response.getStatusLine());
|
||||
}
|
||||
|
||||
HttpDelete delete = new HttpDelete(String.format(url, ip, index, "scholix"));
|
||||
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
|
||||
CloseableHttpResponse response = client.execute(delete);
|
||||
final String summaryConf = IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
DropAndCreateESIndex.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/summary_index.json")));
|
||||
|
||||
log.info("deleting Index SCHOLIX");
|
||||
log.info(STATUS_CODE_TEXT,response.getStatusLine());
|
||||
}
|
||||
HttpPut put = new HttpPut(String.format(url, ip, index, "object"));
|
||||
|
||||
try(CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
StringEntity entity = new StringEntity(summaryConf);
|
||||
put.setEntity(entity);
|
||||
put.setHeader("Accept", APPLICATION_JSON);
|
||||
put.setHeader("Content-type", APPLICATION_JSON);
|
||||
|
||||
final String summaryConf = IOUtils
|
||||
.toString(Objects.requireNonNull(DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/summary_index.json")));
|
||||
log.info("creating First Index SUMMARY");
|
||||
CloseableHttpResponse response = client.execute(put);
|
||||
log.info(STATUS_CODE_TEXT, response.getStatusLine());
|
||||
|
||||
}
|
||||
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
|
||||
final String scholixConf = IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
DropAndCreateESIndex.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/scholix_index.json")));
|
||||
|
||||
HttpPut put = new HttpPut(String.format(url, ip, index, "object"));
|
||||
log.info("creating Index SCHOLIX");
|
||||
final HttpPut put = new HttpPut(String.format(url, ip, index, "scholix"));
|
||||
|
||||
StringEntity entity = new StringEntity(summaryConf);
|
||||
put.setEntity(entity);
|
||||
put.setHeader("Accept", APPLICATION_JSON);
|
||||
put.setHeader("Content-type", APPLICATION_JSON);
|
||||
final StringEntity entity = new StringEntity(scholixConf);
|
||||
put.setEntity(entity);
|
||||
put.setHeader("Accept", APPLICATION_JSON);
|
||||
put.setHeader("Content-type", APPLICATION_JSON);
|
||||
|
||||
log.info("creating First Index SUMMARY");
|
||||
CloseableHttpResponse response = client.execute(put);
|
||||
log.info(STATUS_CODE_TEXT,response.getStatusLine());
|
||||
final CloseableHttpResponse response = client.execute(put);
|
||||
log.info(STATUS_CODE_TEXT, response.getStatusLine());
|
||||
}
|
||||
|
||||
}
|
||||
try(CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
|
||||
final String scholixConf = IOUtils
|
||||
.toString(Objects.requireNonNull(DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/scholix_index.json")));
|
||||
|
||||
|
||||
log.info("creating Index SCHOLIX");
|
||||
final HttpPut put = new HttpPut(String.format(url, ip, index, "scholix"));
|
||||
|
||||
final StringEntity entity = new StringEntity(scholixConf);
|
||||
put.setEntity(entity);
|
||||
put.setHeader("Accept", APPLICATION_JSON);
|
||||
put.setHeader("Content-type", APPLICATION_JSON);
|
||||
|
||||
final CloseableHttpResponse response = client.execute(put);
|
||||
log.info(STATUS_CODE_TEXT, response.getStatusLine());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.provision;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
@ -16,43 +18,48 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|||
|
||||
public class SparkIndexCollectionOnES {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects.requireNonNull(SparkIndexCollectionOnES.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/sx/provision/index_on_es.json"))));
|
||||
parser.parseArgument(args);
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
SparkIndexCollectionOnES.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/sx/provision/index_on_es.json"))));
|
||||
parser.parseArgument(args);
|
||||
|
||||
SparkConf conf = new SparkConf()
|
||||
.setAppName(SparkIndexCollectionOnES.class.getSimpleName())
|
||||
.setMaster(parser.get("master"));
|
||||
SparkConf conf = new SparkConf()
|
||||
.setAppName(SparkIndexCollectionOnES.class.getSimpleName())
|
||||
.setMaster(parser.get("master"));
|
||||
|
||||
final String sourcePath = parser.get("sourcePath");
|
||||
final String index = parser.get("index");
|
||||
final String idPath = parser.get("idPath");
|
||||
final String cluster = parser.get("cluster");
|
||||
final String clusterJson = IOUtils
|
||||
.toString(Objects.requireNonNull(DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json")));
|
||||
final String sourcePath = parser.get("sourcePath");
|
||||
final String index = parser.get("index");
|
||||
final String idPath = parser.get("idPath");
|
||||
final String cluster = parser.get("cluster");
|
||||
final String clusterJson = IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json")));
|
||||
|
||||
final Map<String, String> clusterMap = new ObjectMapper().readValue(clusterJson, Map.class);
|
||||
final Map<String, String> clusterMap = new ObjectMapper().readValue(clusterJson, Map.class);
|
||||
|
||||
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
||||
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
||||
|
||||
try (final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext())) {
|
||||
try (final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext())) {
|
||||
|
||||
JavaRDD<String> inputRdd = sc.textFile(sourcePath);
|
||||
JavaRDD<String> inputRdd = sc.textFile(sourcePath);
|
||||
|
||||
Map<String, String> esCfg = new HashMap<>();
|
||||
esCfg.put("es.nodes", clusterMap.get(cluster));
|
||||
esCfg.put("es.mapping.id", idPath);
|
||||
esCfg.put("es.batch.write.retry.count", "8");
|
||||
esCfg.put("es.batch.write.retry.wait", "60s");
|
||||
esCfg.put("es.batch.size.entries", "200");
|
||||
esCfg.put("es.nodes.wan.only", "true");
|
||||
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||
}
|
||||
}
|
||||
Map<String, String> esCfg = new HashMap<>();
|
||||
esCfg.put("es.nodes", clusterMap.get(cluster));
|
||||
esCfg.put("es.mapping.id", idPath);
|
||||
esCfg.put("es.batch.write.retry.count", "8");
|
||||
esCfg.put("es.batch.write.retry.wait", "60s");
|
||||
esCfg.put("es.batch.size.entries", "200");
|
||||
esCfg.put("es.nodes.wan.only", "true");
|
||||
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,11 +17,11 @@ create table TARGET.result as
|
|||
union all
|
||||
select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
|
||||
union all
|
||||
select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on p.id=rp.project join SOURCE.project_organizations po on po.id=p.id join SOURCE.organization o on o.id=po.organization where rp.id=r.id and o.name in (
|
||||
'GEORG-AUGUST-UNIVERSITAT GOTTINGEN STIFTUNG OFFENTLICHEN RECHTS',
|
||||
'ATHINA-EREVNITIKO KENTRO KAINOTOMIAS STIS TECHNOLOGIES TIS PLIROFORIAS, TON EPIKOINONION KAI TIS GNOSIS',
|
||||
'Consiglio Nazionale delle Ricerche',
|
||||
'Universidade do Minho') )) foo;
|
||||
select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on p.id=rp.project join SOURCE.project_organizations po on po.id=p.id where rp.id=r.id and po.organization in (
|
||||
'openorgs____::759d59f05d77188faee99b7493b46805',
|
||||
'openorgs____::b84450f9864182c67b8611b5593f4250',
|
||||
'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975',
|
||||
'openorgs____::eadc8da90a546e98c03f896661a2e4d4') )) foo;
|
||||
compute stats TARGET.result;
|
||||
|
||||
create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
|
|
Loading…
Reference in New Issue