forked from D-Net/dnet-hadoop
Merge branch 'stable_ids' of https://code-repo.d4science.org/D-Net/dnet-hadoop into stable_ids
This commit is contained in:
commit
6e987fc084
|
@ -131,18 +131,9 @@ public class HttpConnector2 {
|
|||
}
|
||||
return attemptDownload(newUrl, retryNumber + 1, report);
|
||||
}
|
||||
if (is4xx(urlConn.getResponseCode())) {
|
||||
// CLIENT ERROR, DO NOT RETRY
|
||||
report
|
||||
.put(
|
||||
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||
String
|
||||
.format(
|
||||
"%s error: %s", requestUrl, urlConn.getResponseMessage()));
|
||||
throw new CollectorException("4xx error: request will not be repeated. " + report);
|
||||
}
|
||||
if (is5xx(urlConn.getResponseCode())) {
|
||||
if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) {
|
||||
switch (urlConn.getResponseCode()) {
|
||||
case HttpURLConnection.HTTP_NOT_FOUND:
|
||||
case HttpURLConnection.HTTP_BAD_GATEWAY:
|
||||
case HttpURLConnection.HTTP_UNAVAILABLE:
|
||||
case HttpURLConnection.HTTP_GATEWAY_TIMEOUT:
|
||||
|
|
|
@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
|||
|
||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}";
|
||||
public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z";
|
||||
|
||||
private static final String FORMAT_PARAM = "format";
|
||||
private static final String OAI_SET_PARAM = "set";
|
||||
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
||||
|
@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
|||
throw new CollectorException("Param 'mdFormat' is null or empty");
|
||||
}
|
||||
|
||||
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
||||
}
|
||||
|
||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
||||
}
|
||||
|
||||
|
|
|
@ -107,10 +107,12 @@ public class OaiIterator implements Iterator<String> {
|
|||
if (set != null && !set.isEmpty()) {
|
||||
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
||||
}
|
||||
if (fromDate != null && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
||||
}
|
||||
if (untilDate != null && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
||||
}
|
||||
log.info("Start harvesting using url: " + url);
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
// from PROD 2021-07-06 , tf script of HAL with around 3mill. records
|
||||
declare_script "dc_cleaning_OpenAIREplus_compliant_hal";
|
||||
declare_ns oaf = "http://namespace.openaire.eu/oaf";
|
||||
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
|
||||
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
|
||||
declare_ns dc = "http://purl.org/dc/elements/1.1/";
|
||||
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
|
||||
declare_ns oai = "http://www.openarchives.org/OAI/2.0/";
|
||||
declare_ns xs = "http://www.w3.org/2001/XMLSchema";
|
||||
$var0 = "''";
|
||||
$varFP7 = "'corda_______::'";
|
||||
$varH2020 = "'corda__h2020::'";
|
||||
$varDummy = "''";
|
||||
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
||||
static $varRepoid = xpath:"//dri:repositoryId";
|
||||
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
|
||||
dri:objIdentifier = xpath:"//dri:objIdentifier";
|
||||
dri:repositoryId = $varRepoid;
|
||||
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
|
||||
//
|
||||
// communities - deactivated until received green light from DARIAH to mark community on prod also
|
||||
// $varCommunity = xpath:"//*[local-name()='setSpec'][starts-with(., 'collection:DARIAH')]/'dariah'";
|
||||
// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records)
|
||||
// oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;);
|
||||
//
|
||||
// apply xpath:"//dc:contributor[starts-with(., 'European Project')]" if xpath:"string-length(replace(., '.*(\d{6,6}).*', '$1')) = 6" oaf:projectid = xpath:"concat($var1, replace(., '.*(\d{6,6}).*', '$1'))"; else $varDummy = "''";
|
||||
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
if xpath:"//dc:title[string-length(.)> 0]" $varDummy = "''"; else dc:coverage = skipRecord();
|
||||
dc:title = xpath:"//dc:title[string-length(.) > 0]/normalize-space(.)";
|
||||
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
dc:contributor = xpath:"//dc:contributor";
|
||||
// dc:description = xpath:"//dc:description/normalize-space(.)";
|
||||
//dc:description = xpath:"string-join(//dc:description/normalize-space(.), concat('; ',codepoints-to-string(10)))";
|
||||
dc:description = xpath:"string-join(//dc:description/normalize-space(.), '; ')";
|
||||
dc:format = xpath:"//dc:format";
|
||||
$varHttpTest = "''";
|
||||
oaf:fulltext = xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))]";
|
||||
//if xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))] or //dc:relation[starts-with(lower-case(normalize-space(.)), 'info:eu-repo/grantagreement')] or //dc:rights[starts-with(lower-case(normalize-space(.)), 'open') or contains(lower-case(normalize-space(.)), 'openaccess')] or //dc:accessRights[contains(lower-case(normalize-space(.)), 'openaccess')]" $var0 = "''"; else dc:coverage = skipRecord();
|
||||
if xpath:"//dc:identifier[starts-with(., 'http')]" $var0 = "''"; else dc:coverage = skipRecord();
|
||||
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
|
||||
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
|
||||
static dr:dateOfTransformation = xpath:"current-dateTime()";
|
||||
dc:type = xpath:"//dc:type";
|
||||
dc:format = xpath:"//dc:format";
|
||||
dc:date = xpath:"//dc:date";
|
||||
dc:language = Convert(xpath:"//dc:language", Languages);
|
||||
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
|
||||
if xpath:"starts-with($varDateAccepted, '0')" oaf:dateAccepted = $varDummy; else oaf:dateAccepted = $varDateAccepted;
|
||||
$varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')";
|
||||
oaf:embargoenddate = $varEmbargoEnd;
|
||||
// FP7
|
||||
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
||||
// H2020
|
||||
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2012][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
||||
// H2020 workaround for HAL
|
||||
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement//)(\d\d\d\d\d\d)(.*)', 'i')][//dc:contributor[contains(lower-case(.), 'h2020')]][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2012][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement//)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
|
||||
dc:relation = xpath:"//dc:relation";
|
||||
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
|
||||
//
|
||||
oaf:collectedDatasourceid = xpath:"$varDatasourceid";
|
||||
//
|
||||
//if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type) | //oai:setSpec", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
|
||||
$varCobjCategoryReverse = Convert(xpath:"insert-before(reverse(//dc:type) , 0, reverse(//oai:setSpec))", TextTypologies);
|
||||
$varSuperTypeReverse = Convert(xpath:"normalize-space($varCobjCategoryReverse)", SuperTypes);
|
||||
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']/$varCobjCategoryReverse", @type = $varSuperTypeReverse;);
|
||||
$varCobjCategoryStraight = Convert(xpath:"insert-before(//dc:type , 100, //oai:setSpec)", TextTypologies);
|
||||
$varSuperTypeStraight = Convert(xpath:"normalize-space($varCobjCategoryStraight)", SuperTypes);
|
||||
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other'))]/$varCobjCategoryStraight", @type = $varSuperTypeStraight;);
|
||||
//
|
||||
// review level
|
||||
// oaf:refereed = Convert(xpath:"//dc:description", ReviewLevels);
|
||||
$varRefereedConvt = Convert(xpath:"(//dc:type, //oai:setSpec, //dc:description)", ReviewLevels);
|
||||
$varRefereedDesct = xpath:"(//dc:description[matches(lower-case(.), '.*(this\s*book|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*')]/'0001', //dc:description[matches(., '^version\s*(préliminaire.*|0$)')]/'0002')";
|
||||
$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)])pre[\.\-_/\s\(\)]?prints?([\.\-_/\s\(\)].*)?$')][count(//dc:identifier) = 1]/'0002', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)])refereed([\.\-_/\s\(\)\d].*)?$')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and contains(lower-case(.), '-peer-reviewed-article-')]/'0001')";
|
||||
$varRefereed = xpath:"($varRefereedConvt, $varRefereedIdntf, $varRefereedDesct)";
|
||||
if xpath:"count(index-of($varRefereed, '0001')) >0" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
|
||||
if xpath:"count(index-of($varRefereed, '0002')) >0 and count(index-of($varRefereed, '0001')) = 0" oaf:refereed = xpath:"'0002'"; else $varDummy= "''";
|
||||
//
|
||||
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
|
||||
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
|
||||
//2021-06-01 ; acz ; next line to avoid to be OPEN as default, set to UNKNOWN , 2021-07-05 acz
|
||||
//if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "UNKNOWN";
|
||||
oaf:license = xpath:"//dc:rights[starts-with(., 'http') or matches(., '^CC[- ]BY([- ](NC([- ](ND|SA))?|ND|SA))([- ]\d(\.\d)?)?$', 'i')]";
|
||||
//
|
||||
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
//
|
||||
//$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
|
||||
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(., \"info:\") or starts-with(., \"urn:\") or starts-with(., \"doi:\") or starts-with(., \"DOI:\") or starts-with(., \"Doi:\") or starts-with(., \"doi \") or starts-with(., \"DOI \") or starts-with(., \"Doi \") or starts-with(., \"10.\") or ((starts-with(., \"http\")) and contains(., \"doi.org/10.\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/10.\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/http\")) and contains(., \"doi.org/10.\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
|
||||
|
||||
$varIdHdl = identifierExtract('["//dc:identifier[starts-with(., \"HDL:\") and not(starts-with(., \"HDL: http\"))][not(contains(., \"123456789\"))]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/hdl/\") or (starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/url/\") and contains(., \"://hdl.handle.net/\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(info:hdl:|://hdl.handle.net/|info:eu-repo/semantics/altIdentifier/hdl/))(\d.*)');
|
||||
|
||||
|
||||
$varIdIsbn = xpath:"(//dc:identifier, //dc:source)[starts-with(lower-case(.), 'isbn') or starts-with(., '978') or starts-with(., '979')][(matches(., '(isbn[:\s]*)?97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(concat('97', substring-after(., '97'))) = 17) or matches(., '(isbn[:\s]*)?97[89]\d{10}$', 'i')]/replace(., 'isbn[:\s]*', '', 'i'), //dc:relation[starts-with(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')][(matches(., 'info:eu-repo/semantics/altIdentifier/isbn/97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(.) = 59) or matches(., 'info:eu-repo/semantics/altidentifier/isbn/97[89]\d{10}$', 'i')]/substring-after(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')";
|
||||
|
||||
$varIdBibc = identifierExtract('["//dc:identifier[starts-with(., \"BibCode:\") or starts-with(., \"BIBCODE:\") or (starts-with(., \"http:\") and contains(., \"bibcode=\"))]"]' , xpath:"./*[local-name()='record']" , '(^(BibCode:|BIBCODE:|http).*$)');
|
||||
|
||||
$varIdPtnt = identifierExtract('["//dc:identifier[starts-with(., \"Patent N°:\")]"]' , xpath:"./*[local-name()='record']" , '(^Patent N°:.*$)');
|
||||
|
||||
$varPmId = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"PUBMED:\")]"]' , xpath:"./*[local-name()='record']" , '(?!PUBMED: )(\d+)');
|
||||
|
||||
$varIdPmc = identifierExtract('["//dc:identifier[starts-with(., \"PUBMEDCENTRAL:\") or (starts-with(., \"http\") and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/PMC\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/http\")) and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]"]' , xpath:"./*[local-name()='record']" , '(PMC\d+)');
|
||||
|
||||
//$varIdHal = identifierExtract('["//dc:identifier[starts-with(., \"ads-\") or starts-with(., \"anses-\") or starts-with(., \"artxibo-\") or starts-with(., \"bioemco-\") or starts-with(., \"cea-\") or starts-with(., \"cel-\") or starts-with(., \"cirad-\") or starts-with(., \"edutice-\") or starts-with(., \"emse-\") or starts-with(., \"EMSE-\") or starts-with(., \"ensl-\") or starts-with(., \"hal-\") or starts-with(., \"HAL-\") or starts-with(., \"halsde-\") or starts-with(., \"halshs-\") or starts-with(., \"hprints-\") or starts-with(., \"in2p3-\") or starts-with(., \"ineris-\") or starts-with(., \"inria-\") or starts-with(., \"Inria-\") or starts-with(., \"inserm-\") or starts-with(., \"insu-\") or starts-with(., \"INSU-\") or starts-with(., \"ird-\") or starts-with(., \"irsn-\") or starts-with(., \"jpa-\") or starts-with(., \"lirmm-\") or starts-with(., \"medihal-\") or starts-with(., \"meteo-\") or starts-with(., \"mnhn-\") or starts-with(., \"obspm-\") or starts-with(., \"pastel-\") or starts-with(., \"pasteur-\") or starts-with(., \"Pasteur-\") or starts-with(., \"peer-\") or starts-with(., \"ssa-\") or starts-with(., \"tel-\") or starts-with(., \"ujm-\") or starts-with(., \"ijn_\") or starts-with(., \"sic_\") or (starts-with(., \"http\") and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\") or contains(., \"://medihal.archives-ouvertes.fr/hal\")))]", "//dc:relation[((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\")) and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\")))]"]' , xpath:"./*[local-name()='record']" , '((ads|anses|artxibo|bioemco|cea|cel|cirad|edutice|emse|EMSE|ensl|hal|HAL|halsde|halshs|hprints|in2p3|ineris|inria|Inria|inserm|insu|INSU|ird|irsn|jpa|lirmm|medihal|meteo|mnhn|obspm|pastel|pasteur|Pasteur|peer|ssa|tel|ujm)-|(ijn|sic)_).*');
|
||||
$varIdHal = identifierExtract('["//*[local-name() = \"recordIdentifier\"]"]' , xpath:"./*[local-name()='record']" , '(oai:HAL:.*)');
|
||||
|
||||
$varIdArxv = identifierExtract('["//dc:identifier[((starts-with(., \"http\") or starts-with(., \"ArXiv: http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\"))) or starts-with(., \"arXiv:\") or starts-with(., \"ARXIV:\")]", "//dc:relation[(starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/\") and not(contains(., \"/arxiv/http\"))) or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\")))]"]' , xpath:"./*[local-name()='record']" , '(?!(://arxiv.org/abs/|:eu-repo/semantics/altIdentifier/arxiv/))([a-zA-Z].*)');
|
||||
|
||||
$varIdWos = identifierExtract('["//dc:identifier[starts-with(., \"WOS:\") or starts-with(., \"wos: WOS:\")]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/wos/\")]"]' , xpath:"./*[local-name()='record']" , '(info.*|WOS:.+|wos: WOS:.+)');
|
||||
|
||||
//oaf:identifier = set(xpath:"$varId//value[not[. = '10.1145/nnnnnnn.nnnnnnn']]", @identifierType = "doi";);
|
||||
oaf:identifier = set(xpath:"$varIdDoi//value[not(. = '10.1145/nnnnnnn.nnnnnnn')]", @identifierType = "doi";);
|
||||
oaf:identifier = set(xpath:"$varIdHdl//value", @identifierType = "handle";);
|
||||
oaf:identifier = set(xpath:"$varIdIsbn", @identifierType = "isbn";);
|
||||
|
||||
oaf:identifier = set(xpath:"($varIdBibc//value[not(starts-with(., 'http'))]/replace(., 'BIBCODE:\s*', ''), $varIdBibc//value[starts-with(., 'http') and contains(substring-after(., 'bibcode='), codepoints-to-string(38))]/substring-before(substring-after(., 'bibcode='), codepoints-to-string(38)), $varIdBibc//value[starts-with(., 'http') and not(contains(substring-after(., 'bibcode='), codepoints-to-string(38)))]/substring-after(., 'bibcode='))", @identifierType = "bibcode";);
|
||||
|
||||
oaf:identifier = set(xpath:"$varIdPtnt//value/normalize-space(substring-after(., 'Patent N°:'))", @identifierType = "patentNumber";);
|
||||
|
||||
oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";);
|
||||
oaf:identifier = set(xpath:"$varIdPmc//value", @identifierType = "pmcid";);
|
||||
//oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(., '(/document|/image|/file/.*)$', ''))", @identifierType = "hal";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', ''))", @identifierType = "hal";);
|
||||
oaf:identifier = set(xpath:"distinct-values(($varIdArxv//value/normalize-space(replace(., '(https?://arxiv.org/abs/|https?://arxiv.org/pdf/|info:eu-repo/semantics/altIdentifier/arxiv/|info:eu-repo/semantics/altIdentifier/url/|info:eu-repo/semantics/altIdentifier/urn/|arXiv:|\.pdf)', '', 'i'))))", @identifierType = "arxiv";);
|
||||
oaf:identifier = set(xpath:"$varIdWos//value/normalize-space(replace(., '(info:eu-repo/semantics/altIdentifier/wos/|WOS:|wos:)', ''))", @identifierType = "wos";);
|
||||
|
||||
oaf:identifier = set(xpath:"distinct-values(//dc:identifier[starts-with(., 'http') and contains(., $varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', ''))]/replace(., '(/document|/image|/file/.*)$', ''))", @identifierType = "landingPage";);
|
||||
oaf:identifier = set(xpath:"distinct-values(//dc:identifier[starts-with(., 'http') and not(ends-with(., $varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', '')))])", @identifierType = "url";);
|
||||
|
||||
oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-original";);
|
||||
|
||||
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
|
||||
|
||||
// journal data
|
||||
// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...)
|
||||
$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))";
|
||||
//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))";
|
||||
$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))";
|
||||
oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";);
|
||||
|
||||
end
|
|
@ -0,0 +1,140 @@
|
|||
// from PROD 2021-07-06 , tf script of DOAJ with more than 6mill. records
|
||||
declare_script "dc_cleaning_OpenAIREplus_compliant_doaj";
|
||||
declare_ns oaf = "http://namespace.openaire.eu/oaf";
|
||||
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
|
||||
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
|
||||
declare_ns dc = "http://purl.org/dc/elements/1.1/";
|
||||
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
|
||||
$var0 = "''";
|
||||
$varFP7 = "'corda_______::'";
|
||||
$varH2020 = "'corda__h2020::'";
|
||||
$varDummy = "''";
|
||||
// $varUnknownRepoId = "'openaire____::55045bd2a65019fd8e6741a755395c8c'";
|
||||
//
|
||||
$varUnknownRepoId = "'openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18'";
|
||||
$varUnknownRepoName = "'Unknown Repository'";
|
||||
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
|
||||
static $varRepoid = xpath:"//dri:repositoryId";
|
||||
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&apos;/db/DRIVER/RepositoryServiceResources&apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&quot;NamespacePrefix&quot;][value=&quot;', //oaf:datasourceprefix, '&quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
|
||||
dri:objIdentifier = xpath:"//dri:objIdentifier";
|
||||
dri:repositoryId = $varRepoid;
|
||||
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
|
||||
|
||||
if xpath:"//dc:creator[string-length(normalize-space(.)) &gt; 0][contains(., 'CDATA')][starts-with(normalize-space(.), '(')][starts-with(normalize-space(.), '.')]" dc:creator = skipRecord(); else $varDummy = "''";
|
||||
//apply xpath:"//dc:creator" if xpath:"string-length(normalize-space(.)) &amp;gt; 0 and not(contains(., 'CDATA')) and not(starts-with(normalize-space(.), '.')) and not(starts-with(normalize-space(.), '('))" dc:creator = Convert(xpath:".", Person); else $varDummy = "''";
|
||||
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''";
|
||||
//apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0 and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
$varOrcidName = xpath:"//dc:creator[string-length(normalize-space(.)) > 0]";
|
||||
$varOrcidOrcid = xpath:"//dc:creator[string-length(normalize-space(.)) > 0]/@id/replace(., 'https?://orcid.org/', '')";
|
||||
dc:creator = set(xpath:"$varOrcidName", @nameIdentifier = xpath:"subsequence($varOrcidOrcid,position(),1)";, @nameIdentifierScheme=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','ORCID')";, @schemeUri=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','http://orcid.org/')";);
|
||||
|
||||
if xpath:"count(//dc:title[string-length(.) &gt; 0]) = 0" dc:title = skipRecord(); else $varDummy = "''";
|
||||
dc:title = xpath:"//dc:title/normalize-space(replace(., '^(&lt;title language=)(.)*(&gt;)', ''))";
|
||||
// apply xpath:"//dc:title" if xpath:"string-length(.) &gt; 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
|
||||
apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0 and not(@xsi:type = 'dcterms:LCSH')" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
dc:subject = set(xpath:"//dc:subject[@xsi:type = 'dcterms:LCSH']/concat('lcsh:', .)", @classid=xpath:"'lcsh'";, @classname=xpath:"'lcsh'";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
|
||||
|
||||
apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(replace(., '(&lt;br&gt;)', ''))"; else $varDummy = "''";
|
||||
apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
|
||||
dc:contributor = xpath:"//dc:contributor";
|
||||
dc:description = xpath:"//dc:description[not(starts-with(., 'URN: urn:nbn:') or starts-with(., 'URN: http'))]";
|
||||
dc:format = xpath:"//dc:format";
|
||||
$varHttpTest = "''";
|
||||
if xpath:"//dc:relation[starts-with(., 'http') or starts-with(., 'www.')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
|
||||
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
|
||||
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'www.')" dc:identifier = xpath:"concat('http://', normalize-space(.))"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
|
||||
dr:CobjIdentifier = xpath:"distinct-values(//dc:identifier[not(starts-with(normalize-space(.), 'http'))][not(normalize-space(.) = ($varIdList))][not(starts-with(normalize-space(.), 'urn:nbn:') or starts-with(normalize-space(.), 'URN:NBN:'))][not(. = ($varISSN[1], $varISSN[2]))][normalize-space(.) != ''])";
|
||||
dc:identifier = xpath:"($varIdUrl//value[not(starts-with(., 'www'))], $varIdUrl//value[starts-with(., 'www')]/concat('http://', .), $varIdLdpg//value, $varIdDoi//value)[1]";
|
||||
dc:relation = xpath:"//dc:relation[starts-with(., 'https://doaj.org/toc/')]";
|
||||
|
||||
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
|
||||
static dr:dateOfTransformation = xpath:"current-dateTime()";
|
||||
// dc:type = xpath:"//dc:type";
|
||||
dc:language = Convert(xpath:"//dc:language", Languages);
|
||||
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
|
||||
dc:date = xpath:"//dc:date";
|
||||
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
|
||||
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
|
||||
//apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
|
||||
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
|
||||
//
|
||||
oaf:collectedDatasourceid = $varDatasourceid;
|
||||
//
|
||||
// apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
|
||||
//dr:CobjCategory = "0001";
|
||||
$varCobjCategory = Convert(xpath:"//dc:type", TextTypologies);
|
||||
$varSuperType = Convert(xpath:"normalize-space($varCobjCategory)", SuperTypes);
|
||||
dr:CobjCategory = set($varCobjCategory, @type = $varSuperType;);
|
||||
dc:type = xpath:"//dc:type";
|
||||
//
|
||||
// review status
|
||||
|
||||
$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:identifier' and matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]/'0001', //*[string(node-name(.)) = 'dc:relation' and matches(., '^info:eu-repo/semantics/altIdentifier/doi/10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$', 'i')]/'0001')";
|
||||
|
||||
$varRefereedProse = xpath:"(//*[string(node-name(.)) = 'dc:description' and matches(lower-case(.), '.*this\s*preprint\s*has\s*been\s*reviewed\s*and\s*recommended\s*by\s*peer\s*community') and contains(., '10.24072/')]/'0001', //dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001')";
|
||||
$varRefereedReltn = xpath:"(//dc:relation, //dc:identifier)[contains(., '://www.dovepress.com/') and matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001'";
|
||||
$varRefereedTitle = xpath:"//dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001'";
|
||||
$varRefereedDesct = xpath:"(//dc:description[matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001')";
|
||||
$varRefereed = xpath:"($varRefereedIdntf, $varRefereedProse, $varRefereedReltn, $varRefereedTitle, $varRefereedDesct)";
|
||||
//if xpath:"$varRefereed" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
|
||||
if xpath:"count(index-of($varRefereed, '0001')) >0" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
|
||||
if xpath:"count(index-of($varRefereed, '0002')) >0 and count(index-of($varRefereed, '0001')) = 0" oaf:refereed = xpath:"'0002'"; else $varDummy= "''";
|
||||
//
|
||||
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
|
||||
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
|
||||
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
|
||||
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
|
||||
oaf:license = xpath:"(//dc:rights, //dc:relation)[starts-with(normalize-space(.), 'http') and (contains(., '/licenses/') or contains(., '/licence/') or contains(., '/licencias/') or contains(., '/licencia/') or contains(., '://creativecommons.org/') or contains(., '://rightsstatements.org/')) or matches(., '^CC[- ]BY([- ](NC([- ](ND|SA))?|ND|SA))([- ]\d(\.\d)?)?$', 'i')][not(contains(normalize-space(.), ' '))]/normalize-space(.)";
|
||||
//
|
||||
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
|
||||
//
|
||||
//$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
|
||||
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(., \"10.\") or starts-with(., \"DOI:\") or starts-with(., \"doi:\") or (starts-with(., \"http\") and contains(., \"doi.org/\"))]", "//dc:relation[starts-with(., \"10.\") or starts-with(., \"DOI:\") or starts-with(., \"doi:\") or (starts-with(., \"http\") and contains(., \"doi.org/\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
|
||||
$varIdHdl = identifierExtract('["//dc:relation[starts-with(., \"http\") and contains(., \"://hdl.handle.net/\")][not(contains(., \"123456789\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(://hdl.handle.net/))(\d.*)');
|
||||
$varIdUrn = identifierExtract('["//dc:relation[starts-with(., \"urn:nbn:\") or starts-with(., \"URN:NBN:\") or (starts-with(., \"http\") and (contains(., \"://nbn-resolving.org/urn:nbn:\") or contains(., \"://nbn-resolving.de/urn/resolver.pl?urn:nbn:\") or contains(., \"://nbn-resolving.de/urn:nbn:\") or contains(., \"://resolver.obvsg.at/urn:nbn:\") or contains(., \"://urn.fi/URN:NBN:\") or contains(., \"://urn.kb.se/resolve?urn=urn:nbn:\")))]", "//dc:description[contains(., \"URN: urn:nbn:de:0114-\") or contains(., \"URN: http://nbn-resolving.de/urn:nbn:de:0114-\") or (contains(., \"URN:NBN:no-\") and //dc:identifier = \"1893-1774\")]"]' , xpath:"./*[local-name()='record']" , '((urn:nbn:|URN:NBN:).*)');
|
||||
$varIdArk = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"http\") and contains(., \"/ark:\")]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdPmid = identifierExtract('["//dc:relation[starts-with(., \"http\") and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdPmc = identifierExtract('["//dc:relation[starts-with(., \"http\") and (contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\") or contains(., \"//europepmc.org/articles/PMC\"))]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdHal = identifierExtract('["//dc:relation[starts-with(., \"hal-\") or starts-with(., \"halshs-\") or starts-with(., \"halsde-\") or (starts-with(., \"http\") and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\")))]"]' , xpath:"./*[local-name()='record']" , '(hal(shs|sde)?-.*)');
|
||||
$varIdArxv = identifierExtract('["//dc:relation[starts-with(., \"http\") and (contains(., \"://arxiv.org/pdf/\") or contains(., \"://arxiv.org/abs/\"))]"]' , xpath:"./*[local-name()='record']" , '(\d.*)');
|
||||
$varIdLdpg = identifierExtract('["//dc:identifier[starts-with(., \"https://doaj.org/article/\")]"]', xpath:"./*[local-name()='record']" , '(http.*)');
|
||||
$varIdUrl = identifierExtract('["//dc:relation[starts-with(., \"http\")][not(contains(., \"://doaj.org\"))][not(contains(., \"doi.org/\"))][not(contains(., \"hdl.handle.net/\"))][not(contains(., \"://nbn-resolving.de/\") or contains(., \"://nbn-resolving.org/\") or contains(., \"://resolver.obvsg.at/\") or contains(., \"://urn.fi/URN:NBN:\") or contains(., \"://urn.kb.se/resolve\"))][not(contains(., \"://arxiv.org/pdf/\") or contains(., \"://arxiv.org/abs/\"))][not(contains(., \"://localhost/\") or contains(., \"://localhost:\"))]", "//dc:relation[starts-with(., \"www\")]"]', xpath:"./*[local-name()='record']" , '((http|www).*)');
|
||||
|
||||
$varIdList = xpath:"(($varIdDoi//value, $varIdHdl//value, $varIdUrn//value, $varIdArk//value, $varIdPmid//value, $varIdPmc//value, $varIdLdpg//value, $varIdUrl//value))";
|
||||
|
||||
// dropping/cleaning wrong DOIs, as
|
||||
// 2 DOIs just different in 1 ending with . (mostly, but not exclusively, prefixed with 10.5216)
|
||||
// noise stemming from odd/wrong DOI statements' formats
|
||||
// DOIs with 2 prefixes
|
||||
// DOI statements containing first the DOI prefix and then the DOI incl. the resolver prefix
|
||||
//oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
|
||||
//oaf:identifier = set(xpath:"$varIdDoi//value", @identifierType = "doi";);
|
||||
|
||||
oaf:identifier = set(xpath:"distinct-values(($varIdDoi//value[not(ends-with(., '.') and exists(index-of($varIdDoi//value, substring(., 1, string-length(.)-1))))][not(. = '10.4313/article-4')][not(lower-case(.) = ('10.30659/ijibe.2.1.171-181', '10.30659/ijibe.2.1.171', '10.26843/rencima.v8i4.149', '10.26843/rencima.v11i1.215', '10.18273/revfue.v14n2-2016002revista', '10.17061/phrp3112015', '10.21789/24222704', '10.22432/pjsr.2017.14.', '10.22432/pjsr.2017.18.02', '10.22432/pjsr.2017.18.'))][not(starts-with(., '10.1530/VAB-'))][not(starts-with(lower-case(.), '10.1155/s168761720'))][not(starts-with(., '10.15561/10.6084/') or starts-with(., '10.5935/10.19180/'))][not(starts-with(., '10.7454/jvi.v') and string-length(.) = 16)][not(starts-with(., '10.15094/0000') and string-length(.) = 16)][not(matches(., '^10\.\d*/DOI:$'))][not(starts-with(., concat(substring-before(., '/'), '/', substring-before(., '/'), '/')))][not(matches(substring-after(., '/'), '^https?://(dx.)?doi.org/.*') and starts-with(substring-after(., 'doi.org/'), substring-before(., '/')))][not(starts-with(., '10.1371/journal.') and matches(., '^10\.1371/journal\.[a-z]{4}\.\d{7}\.(eor|20050521)$'))][not(substring-before(., '/') = ('10.19183', '10.18066') and matches(., '^(10\.19183/how\.\d*\.\d*|10\.18066/revunivap\.v\d*i\d*)$'))]/lower-case(.), $varIdDoi//value[matches(substring-after(., '/'), '^https?://(dx.)?doi.org/.*') and starts-with(substring-after(., 'doi.org/'), substring-before(., '/'))]/substring-after(., 'doi.org/'), $varIdDoi//value[starts-with(., '10.1371/journal.') and matches(., '^10\.1371/journal\.[a-z]{4}\.\d{7}\.eor$')]/substring(., 1, 28), $varIdDoi//value[starts-with(., '10.15561/10.6084/') or starts-with(., '10.5935/10.19180/')]/substring-after(., '/')))", @identifierType = "doi";);
|
||||
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdHdl//value/normalize-space(replace(., '\?locatt=view:master', '')))", @identifierType = "handle";);
|
||||
oaf:identifier = set(xpath:"$varIdUrn//value", @identifierType = "urn";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdArk//value/replace(substring-after(., '/ark:'), '^/', ''))", @identifierType = "ark";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdPmid//value/replace(., 'https?://www.ncbi.nlm.nih.gov/pmc/articles/pmid/(\d+)(/.*)?', '$1'))", @identifierType = "pmid";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdPmc//value/replace(., 'https?://(www.ncbi.nlm.nih.gov/pmc|europepmc.org)/articles/(PMC\d*)([/\?].*)?', '$2'))", @identifierType = "pmcid";);
|
||||
oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(., '/document', ''))", @identifierType = "hal";);
|
||||
oaf:identifier = set(xpath:"$varIdArxv//value", @identifierType = "arxiv";);
|
||||
oaf:identifier = set(xpath:"$varIdLdpg//value", @identifierType = "landingPage";);
|
||||
oaf:identifier = set(xpath:"($varIdUrl//value[not(starts-with(., 'www'))], $varIdUrl//value[starts-with(., 'www')]/concat('http://', .))", @identifierType = "url";);
|
||||
|
||||
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
|
||||
|
||||
//$varJournalName = xpath:"substring-before(//dc:source, ',')";
|
||||
$varJournalTitle = xpath:"(//dc:source[contains(., ', Vol ')]/substring-before(., ', Vol '), //dc:source[contains(., ', Iss ')]/substring-before(., ', Iss '))[1]";
|
||||
$varVol = xpath:"//dc:source[contains(., ', Vol ')][matches(., ', Vol \d+')]/replace(substring-after(., ', Vol '), '^(\d+).*$', '$1')";
|
||||
$varIss = xpath:"//dc:source[contains(., ', Iss ')][matches(., ', Iss \d+')]/replace(substring-after(., ', Iss '), '^(\d+).*$', '$1')";
|
||||
$varSp = xpath:"//dc:source[contains(., ', Pp ')][matches(., ', Pp \d+-\d+')]/substring-before(substring-after(., ', Pp '), '-')";
|
||||
$varEp = xpath:"//dc:source[contains(., ', Pp ')][matches(., ', Pp \d+-\d+')]/replace(substring-after(substring-after(., ', Pp '), '-'), '^(\d+).*$', '$1')";
|
||||
$varISSN = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][1]";
|
||||
//oaf:journal = set($varJournalName, @issn = xpath:"//dc:identifier[string-length() = 9 and matches(., '^(\d{4})-(\d{4}|\d{3}X)')][1]"; , @eissn = xpath:"//dc:identifier[string-length() = 9 and matches(., '^(\d{4})-(\d{4}|\d{3}X)')][2]";);
|
||||
//oaf:journal = set($varJournalName, @issn = xpath:"//dc:identifier[string-length() = 9]";);
|
||||
oaf:journal = set($varJournalTitle, @issn = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][1]";, @eissn = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][2]";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";);
|
||||
|
||||
end
|
|
@ -0,0 +1,492 @@
|
|||
<!-- from PROD 2021-06-14 -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:transformExt="http://namespace.openaire.eu/java/org.apache.commons.codec.digest.DigestUtils"
|
||||
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
|
||||
extension-element-prefixes="transformExt TransformationFunction"
|
||||
exclude-result-prefixes="transformExt TransformationFunction" >
|
||||
<xsl:output indent="yes" omit-xml-declaration="yes"/>
|
||||
|
||||
<!--
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
|
||||
-->
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
|
||||
<xsl:param name="varFP7OtherDOI" select="'10.13039/100011102'"/>
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
|
||||
<xsl:param name="varFP7" select="'corda_______::'"/>
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
|
||||
|
||||
<xsl:param name="index" select="0"/>
|
||||
<xsl:param name="transDate" select="current-dateTime()"/>
|
||||
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate"/>
|
||||
</xsl:if>
|
||||
<!-- in journal.fi xml:lang of translated titles is not within the trans-title element but within the surrounding trans-title-group element (which just contains 1 trans-title element) -->
|
||||
<!--
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()=('article-title', 'trans-title-group')][string-length(normalize-space(.))> 0]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:title'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="title">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name()='title-group']//*[local-name()=('article-title', 'trans-title', 'subtitle', 'trans-subtitle')]"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))]"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group'][@content-type='author']/*[local-name() = 'contrib']"/>
|
||||
</xsl:call-template>
|
||||
<!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()=('abstract', 'trans-abstract')]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:description'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group']//*[local-name()='kwd']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:value-of select="//*[local-name()='metadata']//*[local-name()='article']/@xml:lang" />
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="//*[local-name()='article-meta']/*[local-name()='self-uri'][contains(./@xlink:href, '/view/')]/@xlink:href" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<!--
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])" />
|
||||
|
||||
<xsl:value-of select="TransformationFunction:Convert($tf, //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub'], 'DateISO8601', 'yyyy-MM-dd', 'min()')" />
|
||||
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/replace(concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day']), '-(\d)([-$])', '-0$1$2')" />
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']), 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']), 2))" />
|
||||
-->
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']) idiv 2 + 1, 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']) idiv 2 +1, 2))" />
|
||||
|
||||
</xsl:element>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub']">
|
||||
<xsl:choose>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2] and ./*[local-name()='day' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="./*[local-name()='year']"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()='meta-value'], //*[local-name()='permissions']/*[local-name()='copyright-statement']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='permissions']/*[local-name()='license']/@xlink:href"/>
|
||||
<xsl:with-param name="targetElement" select="'oaf:license'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri'][not(./@content-type = 'application/pdf')]/@xlink:href">
|
||||
<oaf:identifier>
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>landingPage</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri' and ./@content-type='application/pdf' and //oaf:datasourceprefix = ('ambientesust', 'qualityinedu')]/@xlink:href/replace(., '/view/', '/download/')">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI) or ends-with(., $varFP7OtherDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<!-- -->
|
||||
<xsl:variable name='varRights' select="distinct-values((for $i in (
|
||||
//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href,
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and not( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
and not( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())])]/'open',
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and (( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
or ( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())]))]/'embargo')
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'AccessRights')))" />
|
||||
|
||||
<!--
|
||||
and not((xs:date( max( (start_date, '0001-01-01') ) ) gt current-date()))
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read' and and not((xs:date( max( (./@start_date, '0001-01-01') ) ) gt current-date()))]/'open'
|
||||
-->
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="$varRights[. = 'EMBARGO']">
|
||||
<xsl:value-of select="'EMBARGO'"/>
|
||||
</xsl:when>
|
||||
<xsl:when test="$varRights[. != 'UNKNOWN']">
|
||||
<xsl:value-of select="$varRights[. != 'UNKNOWN'][1]"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<!--
|
||||
<oaf:accessrights>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:value-of select="(//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href)/TransformationFunction:convertString($tf, ., 'AccessRights')" />
|
||||
</xsl:element>
|
||||
-->
|
||||
|
||||
<!--
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:variable name='varCobjCategory' select="TransformationFunction:convertString($tf, //*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()='meta-value'], 'TextTypologies')" />
|
||||
<xsl:variable name='varSuperType' select="TransformationFunction:convertString($tf, $varCobjCategory, 'SuperTypes')" />
|
||||
<xsl:attribute name="type" select="$varSuperType"/>
|
||||
<xsl:value-of select="$varCobjCategory" />
|
||||
</xsl:element>
|
||||
|
||||
<xsl:variable name='varCobjCatLst' select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')" />
|
||||
-->
|
||||
|
||||
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type))"/>
|
||||
<xsl:variable name='varCobjCatLst' select="distinct-values((for $i in $varTypLst
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')))" />
|
||||
<xsl:variable name='varCobjSupLst' select="for $i in $varCobjCatLst
|
||||
return concat($i, '###', TransformationFunction:convertString($tf, normalize-space($i), 'SuperTypes'))" />
|
||||
<dr:CobjCategory>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:attribute name="type" select="'other'"/>
|
||||
<xsl:value-of select="'0000'" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</dr:CobjCategory>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="$varCobjSupLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<xsl:for-each select="$varTypLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="(//*[local-name()='article']/@article-type, //*[local-name() = 'custom-meta' and ./@specific-use = 'resource-type']/*[local-name() = ('meta-value', 'meta-name')])">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<oaf:language>
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name()='metadata']//*[local-name()='article']/@xml:lang, 'Languages')" />
|
||||
</oaf:language>
|
||||
|
||||
<!-- review status -->
|
||||
<!-- ToDo:
|
||||
review status
|
||||
~ ask Journal.fi to put it elsewhere
|
||||
~ evaluate article-version (no example found yet)
|
||||
subject/kwd:
|
||||
~ handle thesauri (no example found yet)
|
||||
relations:
|
||||
~ handle fn (no example found yet)
|
||||
-->
|
||||
<!--
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
-->
|
||||
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in ($varTypLst)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedSubjt" select="//*[local-name() = 'article-categories' and contains(//dri:recordIdentifier, 'oai:journal.fi')]/*[local-name() = 'subj-group' and ./@subj-group-type='heading']/*[local-name() = 'subject' and . = 'Peer reviewed articles']/'0001'"/>
|
||||
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedSubjt)"/>
|
||||
<!--
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereedDescp"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereed"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="count($varRefereed[. = '0001']) > 0"/>
|
||||
</oaf:refereed>
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:param name="targetElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:if test="(.[@xml:lang] or ..[@xml:lang]) and $targetElement = ('dc:title', 'dc:description', 'dc:subject')">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="title">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:title">
|
||||
<xsl:if test=".[@xml:lang] or ..[@xml:lang]">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="string-join((., ./following-sibling::*[local-name() = ('subtitle', 'trans-subtitle')])/normalize-space(.), ': ')"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle"/>
|
||||
<xsl:param name="issn"/>
|
||||
<xsl:param name="eissn"/>
|
||||
<xsl:param name="vol"/>
|
||||
<xsl:param name="issue"/>
|
||||
<xsl:param name="sp"/>
|
||||
<xsl:param name="ep"/>
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:if test="string-length($sourceElement[@pub-id-type='doi']) gt 0">
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate"/>
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,437 @@
|
|||
<!-- from production 2021-0614 -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:transformExt="http://namespace.openaire.eu/java/org.apache.commons.codec.digest.DigestUtils"
|
||||
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
|
||||
extension-element-prefixes="transformExt TransformationFunction"
|
||||
exclude-result-prefixes="transformExt TransformationFunction" >
|
||||
<xsl:output indent="yes" omit-xml-declaration="yes"/>
|
||||
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
|
||||
<xsl:param name="varFP7" select="'corda_______::'"/>
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
|
||||
|
||||
<xsl:param name="index" select="0"/>
|
||||
<xsl:param name="transDate" select="current-dateTime()"/>
|
||||
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate"/>
|
||||
</xsl:if>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:title'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'contrib'][@contrib-type='author']"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))][./*[local-name()='name'] or ./*[local-name()='name-alternatives']/*[local-name()='name']][string-length(.//*[local-name()='surname']) + string-length(.//*[local-name()='given-names']) > 0]"/>
|
||||
</xsl:call-template>
|
||||
<!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()='abstract']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:description'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group' and not(lower-case(@kwd-group-type)=('mesh', 'ocis'))]//*[local-name()='kwd']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='mesh' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'mesh'"/>
|
||||
<xsl:attribute name="schemeURI" select="'http://www.nlm.nih.gov/mesh/'"/>
|
||||
<xsl:attribute name="valueURI" select="''"/>
|
||||
<xsl:value-of select="./concat('mesh:', replace(., 'mesh (.*)$', '$1'))"/>
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='ocis' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'ocis'"/>
|
||||
<xsl:attribute name="schemeURI" select="''"/>
|
||||
<xsl:attribute name="valueURI" select="''"/>
|
||||
<xsl:value-of select="./concat('ocis:', .)"/>
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/(*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version'], *[local-name() = 'article-version'])/concat('article-version (', @article-version-type, ') ', .)"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:text>eng</xsl:text>
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:fulltext">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<xsl:choose>
|
||||
<xsl:when test="//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub'] or //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']" >
|
||||
<xsl:if test="string(number($month)) eq 'NaN'" >
|
||||
<xsl:value-of select="concat($year, '-', '01', '-', '01')" />
|
||||
</xsl:if>
|
||||
<xsl:if test="string(number($month)) != 'NaN'" >
|
||||
<xsl:value-of select="concat($year, '-', $month, '-', '01')" />
|
||||
</xsl:if>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat(//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='ppub']/*[local-name()='year'], '-01-01')" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:element>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='copyright-statement'])"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='license'])"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:text>OPEN</xsl:text>
|
||||
</xsl:element>
|
||||
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:attribute name="type" select="'publication'"/>
|
||||
<xsl:text>0001</xsl:text>
|
||||
</xsl:element>
|
||||
|
||||
<dc:type>
|
||||
<xsl:value-of select="//*[local-name() = 'article']/@article-type"/>
|
||||
</dc:type>
|
||||
|
||||
<!-- custom-meta perhaps not used for types, then drop
|
||||
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type))"/>
|
||||
<xsl:variable name='varTypLst' select="//*[local-name() = 'article']/@article-type"/>
|
||||
-->
|
||||
<!-- perhaps ensure that file indeed exists, e.g. as pdf etc -->
|
||||
<!--
|
||||
// reduce load for the big PubMed records by exchanging variables with choose
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedFnote" select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
|
||||
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
|
||||
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
|
||||
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
|
||||
matches(lower-case(.), '.*refereed\s*anonymously.*') or
|
||||
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*')
|
||||
]/'0001'"/>
|
||||
<xsl:variable name="varRefereedReviw" select="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
|
||||
matches(lower-case(.), '.*peer\s*review\s*file.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedReltn" select="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]/'0002'"/>
|
||||
<xsl:variable name="varRefereedCtRol" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
|
||||
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
|
||||
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]/'0001'"/>
|
||||
<xsl:variable name="varRefereedVersn" select="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]/'0002'"/>
|
||||
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedFnote, $varRefereedReviw, $varRefereedReltn, $varRefereedCtRol, $varRefereedVersn)"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
-->
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'article-id'][@pub-id-type='doi'][matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
|
||||
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
|
||||
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
|
||||
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
|
||||
matches(lower-case(.), '.*refereed\s*anonymously.*') or
|
||||
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*') or
|
||||
matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\].*') or
|
||||
matches(lower-case(.), '.*\[.*referees\s*:.*\].*') or
|
||||
matches(lower-case(.), '^\s*plagiarism[\s\-\._]check.*') or
|
||||
matches(lower-case(.), '^\s*peer[\s\-\._]*review.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)reviewer.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)review\s*reports?.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
|
||||
matches(lower-case(.), '.*peer\s*review\s*file.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
|
||||
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
|
||||
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
|
||||
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varHostedByName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varHostedById"/>
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
|
||||
<xsl:for-each select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = 'fn-group']/*[local-name() = 'fn'][matches(lower-case(.), 'country(/territory)? of origin:?\s*[A-Za-z\-]+')]">
|
||||
<oaf:country>
|
||||
<!--
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, replace(lower-case(.), '^(.|\s)*country(/territory)? of origin:?\s+([A-Za-z\-,\(\)]+(\s+[A-Za-z\-,\(\)]+)*)(.|\s)*$', '$3'), 'Countries')"/>
|
||||
-->
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(substring(substring-after(lower-case(.), 'of origin'), 2)), 'Countries')"/>
|
||||
</oaf:country>
|
||||
</xsl:for-each>
|
||||
|
||||
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:param name="targetElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle"/>
|
||||
<xsl:param name="issn"/>
|
||||
<xsl:param name="eissn"/>
|
||||
<xsl:param name="vol"/>
|
||||
<xsl:param name="issue"/>
|
||||
<xsl:param name="sp"/>
|
||||
<xsl:param name="ep"/>
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmc</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmcid']"/>
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmid</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmid']"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<!--
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
-->
|
||||
<xsl:value-of select="concat(normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='surname']), ', ', normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='given-names']))"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate"/>
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,493 @@
|
|||
<!-- from PROD 2021-06-14 -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner"
|
||||
version="2.0">
|
||||
|
||||
<!--
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
|
||||
-->
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
|
||||
<xsl:param name="varFP7OtherDOI" select="'10.13039/100011102'"/>
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
|
||||
<xsl:param name="varFP7" select="'corda_______::'"/>
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
|
||||
|
||||
<xsl:param name="index" select="0"/>
|
||||
<xsl:param name="transDate" select="current-dateTime()"/>
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate"/>
|
||||
</xsl:if>
|
||||
<!-- in journal.fi xml:lang of translated titles is not within the trans-title element but within the surrounding trans-title-group element (which just contains 1 trans-title element) -->
|
||||
<!--
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()=('article-title', 'trans-title-group')][string-length(normalize-space(.))> 0]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:title'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="title">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name()='title-group']//*[local-name()=('article-title', 'trans-title', 'subtitle', 'trans-subtitle')]"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))]"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group'][@content-type='author']/*[local-name() = 'contrib']"/>
|
||||
</xsl:call-template>
|
||||
<!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()=('abstract', 'trans-abstract')]"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:description'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group']//*[local-name()='kwd']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:source'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:value-of select="//*[local-name()='metadata']//*[local-name()='article']/@xml:lang" />
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="//*[local-name()='article-meta']/*[local-name()='self-uri'][contains(./@xlink:href, '/view/')]/@xlink:href" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<!--
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])" />
|
||||
|
||||
<xsl:value-of select="TransformationFunction:Convert($tf, //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub'], 'DateISO8601', 'yyyy-MM-dd', 'min()')" />
|
||||
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/replace(concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day']), '-(\d)([-$])', '-0$1$2')" />
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']), 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']), 2))" />
|
||||
-->
|
||||
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
|
||||
concat(./*[local-name()='year'], '-',
|
||||
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']) idiv 2 + 1, 2), '-',
|
||||
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']) idiv 2 +1, 2))" />
|
||||
|
||||
</xsl:element>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub']">
|
||||
<xsl:choose>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2] and ./*[local-name()='day' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'])"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4]">
|
||||
<dc:date>
|
||||
<xsl:value-of select="./*[local-name()='year']"/>
|
||||
</dc:date>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()='meta-value'], //*[local-name()='permissions']/*[local-name()='copyright-statement']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='permissions']/*[local-name()='license']/@xlink:href"/>
|
||||
<xsl:with-param name="targetElement" select="'oaf:license'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'"/>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri'][not(./@content-type = 'application/pdf')]/@xlink:href">
|
||||
<oaf:identifier>
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>landingPage</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri' and ./@content-type='application/pdf' and //oaf:datasourceprefix = ('ambientesust', 'qualityinedu')]/@xlink:href/replace(., '/view/', '/download/')">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="."/>
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI) or ends-with(., $varFP7OtherDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<!-- -->
|
||||
<xsl:variable name='varRights' select="distinct-values((for $i in (
|
||||
//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href,
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and not( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
and not( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())])]/'open',
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
|
||||
and (( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
|
||||
or ( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())]))]/'embargo')
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:access_modes') "
|
||||
/>
|
||||
|
||||
<!--
|
||||
and not((xs:date( max( (start_date, '0001-01-01') ) ) gt current-date()))
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read' and and not((xs:date( max( (./@start_date, '0001-01-01') ) ) gt current-date()))]/'open'
|
||||
-->
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="$varRights[. = 'EMBARGO']">
|
||||
<xsl:value-of select="'EMBARGO'"/>
|
||||
</xsl:when>
|
||||
<xsl:when test="$varRights[. != 'UNKNOWN']">
|
||||
<xsl:value-of select="$varRights[. != 'UNKNOWN'][1]"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<!--
|
||||
<oaf:accessrights>
|
||||
<xsl:value-of select="$varRights[1]"/>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:value-of select="(//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href)/TransformationFunction:convertString($tf, ., 'AccessRights')" />
|
||||
</xsl:element>
|
||||
-->
|
||||
|
||||
<!--
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:variable name='varCobjCategory' select="TransformationFunction:convertString($tf, //*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()='meta-value'], 'TextTypologies')" />
|
||||
<xsl:variable name='varSuperType' select="TransformationFunction:convertString($tf, $varCobjCategory, 'SuperTypes')" />
|
||||
<xsl:attribute name="type" select="$varSuperType"/>
|
||||
<xsl:value-of select="$varCobjCategory" />
|
||||
</xsl:element>
|
||||
|
||||
<xsl:variable name='varCobjCatLst' select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')" />
|
||||
-->
|
||||
|
||||
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type))"/>
|
||||
<xsl:variable name='varCobjCatLst' select="distinct-values((for $i in $varTypLst
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:dnet:publication_resource')))" />
|
||||
<xsl:variable name='varCobjSupLst' select="for $i in $varCobjCatLst
|
||||
return concat($i, '###', vocabulary:clean( normalize-space($i), 'dnet:result_typologies'))" />
|
||||
<dr:CobjCategory>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) > 0">
|
||||
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
|
||||
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
|
||||
<xsl:value-of select="substring-before($varCobjSup, '###')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:attribute name="type" select="'other'"/>
|
||||
<xsl:value-of select="'0000'" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</dr:CobjCategory>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="$varCobjSupLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<xsl:for-each select="$varTypLst">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
|
||||
<!--
|
||||
<xsl:for-each select="(//*[local-name()='article']/@article-type, //*[local-name() = 'custom-meta' and ./@specific-use = 'resource-type']/*[local-name() = ('meta-value', 'meta-name')])">
|
||||
<dc:type>
|
||||
<xsl:value-of select="."/>
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
-->
|
||||
|
||||
<oaf:language>
|
||||
<xsl:value-of select="vocabulary:clean( //*[local-name()='metadata']//*[local-name()='article']/@xml:lang, 'dnet:languages')" />
|
||||
</oaf:language>
|
||||
|
||||
<!-- review status -->
|
||||
<!-- ToDo:
|
||||
review status
|
||||
~ ask Journal.fi to put it elsewhere
|
||||
~ evaluate article-version (no example found yet)
|
||||
subject/kwd:
|
||||
~ handle thesauri (no example found yet)
|
||||
relations:
|
||||
~ handle fn (no example found yet)
|
||||
-->
|
||||
<!--
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in (
|
||||
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
|
||||
//*[local-name() = 'article']/@article-type)
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
|
||||
-->
|
||||
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in ($varTypLst)
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:review_levels')"/>
|
||||
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
|
||||
<xsl:variable name="varRefereedSubjt" select="//*[local-name() = 'article-categories' and contains(//dri:recordIdentifier, 'oai:journal.fi')]/*[local-name() = 'subj-group' and ./@subj-group-type='heading']/*[local-name() = 'subject' and . = 'Peer reviewed articles']/'0001'"/>
|
||||
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedSubjt)"/>
|
||||
<!--
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereedDescp"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="$varRefereed"/>
|
||||
</oaf:refereed>
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="count($varRefereed[. = '0001']) > 0"/>
|
||||
</oaf:refereed>
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'"/>
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId"/>
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:param name="targetElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:if test="(.[@xml:lang] or ..[@xml:lang]) and $targetElement = ('dc:title', 'dc:description', 'dc:subject')">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="title">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:title">
|
||||
<xsl:if test=".[@xml:lang] or ..[@xml:lang]">
|
||||
<xsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="string-join((., ./following-sibling::*[local-name() = ('subtitle', 'trans-subtitle')])/normalize-space(.), ': ')"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle"/>
|
||||
<xsl:param name="issn"/>
|
||||
<xsl:param name="eissn"/>
|
||||
<xsl:param name="vol"/>
|
||||
<xsl:param name="issue"/>
|
||||
<xsl:param name="sp"/>
|
||||
<xsl:param name="ep"/>
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)"/>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)"/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:if test="string-length($sourceElement[@pub-id-type='doi']) gt 0">
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement"/>
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate"/>
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*"/>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,373 @@
|
|||
<!-- for adaptation , 2021-06-14 PROD -->
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner"
|
||||
version="2.0">
|
||||
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDsType" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:output indent="yes" omit-xml-declaration="yes" />
|
||||
<xsl:param name="varHostedById" select="'opendoar____::908'" />
|
||||
<xsl:param name="varHostedByName" select="'Europe PubMed Central'" />
|
||||
|
||||
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'" />
|
||||
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'" />
|
||||
<xsl:param name="varFP7" select="'corda_______::'" />
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'" />
|
||||
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
|
||||
<xsl:param name="index" select="0" />
|
||||
<xsl:param name="transDate" select="current-dateTime()" />
|
||||
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
|
||||
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
|
||||
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0]" />
|
||||
<xsl:with-param name="targetElement" select="'dc:title'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="authors">
|
||||
<!--
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'contrib'][@contrib-type='author']"/>
|
||||
-->
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))][./*[local-name()='name'] or ./*[local-name()='name-alternatives']/*[local-name()='name']][string-length(.//*[local-name()='surname']) + string-length(.//*[local-name()='given-names']) > 0]" />
|
||||
</xsl:call-template> <!-- <xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
|
||||
</xsl:call-template>
|
||||
-->
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()='abstract']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:description'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group' and not(lower-case(@kwd-group-type)=('mesh', 'ocis'))]//*[local-name()='kwd']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'" />
|
||||
</xsl:call-template>
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='mesh' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'mesh'" />
|
||||
<xsl:attribute name="schemeURI" select="'http://www.nlm.nih.gov/mesh/'" />
|
||||
<xsl:attribute name="valueURI" select="''" />
|
||||
<xsl:value-of select="./concat('mesh:', replace(., 'mesh (.*)$', '$1'))" />
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='ocis' and ./*[local-name()='kwd']]">
|
||||
<xsl:for-each select="./*[local-name()='kwd']">
|
||||
<dc:subject>
|
||||
<xsl:attribute name="subjectScheme" select="'ocis'" />
|
||||
<xsl:attribute name="schemeURI" select="''" />
|
||||
<xsl:attribute name="valueURI" select="''" />
|
||||
<xsl:value-of select="./concat('ocis:', .)" />
|
||||
</dc:subject>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:source'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/(*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version'], *[local-name() = 'article-version'])/concat('article-version (', @article-version-type, ') ', .)" />
|
||||
<xsl:with-param name="targetElement" select="'dc:source'" />
|
||||
</xsl:call-template>
|
||||
<xsl:element name="dc:language">
|
||||
<xsl:text>eng</xsl:text>
|
||||
</xsl:element>
|
||||
<xsl:element name="dc:identifier">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:fulltext">
|
||||
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:dateAccepted">
|
||||
<xsl:choose>
|
||||
<xsl:when test="//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub'] or //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']">
|
||||
<xsl:if test="string(number($month)) eq 'NaN'">
|
||||
<xsl:value-of select="concat($year, '-', '01', '-', '01')" />
|
||||
</xsl:if>
|
||||
<xsl:if test="string(number($month)) != 'NaN'">
|
||||
<xsl:value-of select="concat($year, '-', $month, '-', '01')" />
|
||||
</xsl:if>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat(//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='ppub']/*[local-name()='year'], '-01-01')" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:element>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='copyright-statement'])" />
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='license'])" />
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']" />
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="identifiers">
|
||||
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']" />
|
||||
</xsl:call-template>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
|
||||
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:element name="oaf:accessrights">
|
||||
<xsl:text>OPEN</xsl:text>
|
||||
</xsl:element>
|
||||
<xsl:element name="dr:CobjCategory">
|
||||
<xsl:attribute name="type" select="'publication'" />
|
||||
<xsl:text>0001</xsl:text>
|
||||
</xsl:element>
|
||||
<dc:type>
|
||||
<xsl:value-of select="//*[local-name() = 'article']/@article-type" />
|
||||
</dc:type>
|
||||
|
||||
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in (//*[local-name() = 'resource']/*[local-name() = ('resourceType', 'version')]/(., @uri))
|
||||
return vocabulary:clean( normalize-space($i), 'dnet:review_levels')"/>
|
||||
|
||||
<!-- <xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
|
||||
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')" />
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'article-id'][@pub-id-type='doi'][matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
|
||||
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
|
||||
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
|
||||
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
|
||||
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
|
||||
matches(lower-case(.), '.*refereed\s*anonymously.*') or
|
||||
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*') or
|
||||
matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\].*') or
|
||||
matches(lower-case(.), '.*\[.*referees\s*:.*\].*') or
|
||||
matches(lower-case(.), '^\s*plagiarism[\s\-\._]check.*') or
|
||||
matches(lower-case(.), '^\s*peer[\s\-\._]*review.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)reviewer.*') or
|
||||
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)review\s*reports?.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
|
||||
matches(lower-case(.), '.*peer\s*review\s*file.*')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
|
||||
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
|
||||
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereedConvt[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
<xsl:call-template name="journal">
|
||||
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']" />
|
||||
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']" />
|
||||
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']" />
|
||||
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']" />
|
||||
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']" />
|
||||
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']" />
|
||||
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']" />
|
||||
</xsl:call-template>
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varHostedByName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varHostedById" />
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId" />
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
<xsl:for-each select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = 'fn-group']/*[local-name() = 'fn'][matches(lower-case(.), 'country(/territory)? of origin:?\s*[A-Za-z\-]+')]">
|
||||
<oaf:country>
|
||||
<!--
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, replace(lower-case(.), '^(.|\s)*country(/territory)? of origin:?\s+([A-Za-z\-,\(\)]+(\s+[A-Za-z\-,\(\)]+)*)(.|\s)*$', '$3'), 'Countries')"/>
|
||||
-->
|
||||
<!-- ACz, 2021-06-14
|
||||
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(substring(substring-after(lower-case(.), 'of origin'), 2)), 'Countries')" />
|
||||
-->
|
||||
<xsl:value-of select="vocabulary:clean( normalize-space(substring(substring-after(lower-case(.), 'of origin'), 2)), 'dnet:countries')"/>
|
||||
</oaf:country>
|
||||
</xsl:for-each>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
</xsl:template>
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:param name="targetElement" />
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
<xsl:template name="journal">
|
||||
<xsl:param name="journalTitle" />
|
||||
<xsl:param name="issn" />
|
||||
<xsl:param name="eissn" />
|
||||
<xsl:param name="vol" />
|
||||
<xsl:param name="issue" />
|
||||
<xsl:param name="sp" />
|
||||
<xsl:param name="ep" />
|
||||
<xsl:element name="oaf:journal">
|
||||
<xsl:attribute name="issn">
|
||||
<xsl:value-of select="normalize-space($issn)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="eissn">
|
||||
<xsl:value-of select="normalize-space($eissn)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="vol">
|
||||
<xsl:value-of select="normalize-space($vol)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="iss">
|
||||
<xsl:value-of select="normalize-space($issue)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="sp">
|
||||
<xsl:value-of select="normalize-space($sp)" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="ep">
|
||||
<xsl:value-of select="normalize-space($ep)" />
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="normalize-space($journalTitle)" />
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<xsl:template name="identifiers">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>doi</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='doi']" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmc</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmcid']" />
|
||||
</xsl:element>
|
||||
<xsl:element name="oaf:identifier">
|
||||
<xsl:attribute name="identifierType">
|
||||
<xsl:text>pmid</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="$sourceElement[@pub-id-type='pmid']" />
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<xsl:template name="authors">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="dc:creator">
|
||||
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
|
||||
<xsl:attribute name="nameIdentifierScheme">
|
||||
<xsl:text>ORCID</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="schemeURI">
|
||||
<xsl:text>http://orcid.org/</xsl:text>
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="nameIdentifier">
|
||||
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')" />
|
||||
</xsl:attribute>
|
||||
</xsl:if> <!--
|
||||
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
|
||||
-->
|
||||
<xsl:value-of select="concat(normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='surname']), ', ', normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='given-names']))" />
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate" />
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -35,7 +35,7 @@
|
|||
<configuration>
|
||||
<args>
|
||||
<arg>-Xmax-classfile-name</arg>
|
||||
<arg>140</arg>
|
||||
<arg>200</arg>
|
||||
</args>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
|
|
|
@ -3,8 +3,12 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
|||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -98,14 +102,9 @@ public class MergeClaimsApplication {
|
|||
raw
|
||||
.joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer")
|
||||
.map(
|
||||
(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> {
|
||||
Optional<Tuple2<String, T>> opRaw = Optional.ofNullable(value._1());
|
||||
Optional<Tuple2<String, T>> opClaim = Optional.ofNullable(value._2());
|
||||
|
||||
return opRaw.isPresent()
|
||||
? opRaw.get()._2()
|
||||
: opClaim.isPresent() ? opClaim.get()._2() : null;
|
||||
},
|
||||
(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> processClaims(
|
||||
Optional.ofNullable(value._1()),
|
||||
Optional.ofNullable(value._2())),
|
||||
Encoders.bean(clazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
|
@ -117,6 +116,78 @@ public class MergeClaimsApplication {
|
|||
.text(outPath);
|
||||
}
|
||||
|
||||
private static <T extends Oaf> T processClaims(Optional<Tuple2<String, T>> opRaw,
|
||||
Optional<Tuple2<String, T>> opClaim) {
|
||||
|
||||
// when both are present
|
||||
if (opClaim.isPresent() && opRaw.isPresent()) {
|
||||
T oafClaim = opClaim.get()._2();
|
||||
if (oafClaim instanceof Result) {
|
||||
T oafRaw = opRaw.get()._2();
|
||||
|
||||
// merge the context lists from both oaf objects ...
|
||||
final List<Context> context = mergeContexts((Result) oafClaim, (Result) oafRaw);
|
||||
|
||||
// ... and set it on the result from the aggregator
|
||||
((Result) oafRaw).setContext(context);
|
||||
return oafRaw;
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise prefer the result from the aggregator
|
||||
return opRaw.isPresent()
|
||||
? opRaw.get()._2()
|
||||
: opClaim.map(Tuple2::_2).orElse(null);
|
||||
}
|
||||
|
||||
private static List<Context> mergeContexts(Result oafClaim, Result oafRaw) {
|
||||
return new ArrayList<>(
|
||||
Stream
|
||||
.concat(
|
||||
Optional
|
||||
.ofNullable(oafClaim.getContext())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()),
|
||||
Optional
|
||||
.ofNullable(oafRaw.getContext())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()))
|
||||
.collect(
|
||||
Collectors
|
||||
.toMap(
|
||||
Context::getId,
|
||||
c -> c,
|
||||
(c1, c2) -> {
|
||||
Context c = new Context();
|
||||
c.setId(c1.getId());
|
||||
c
|
||||
.setDataInfo(
|
||||
new ArrayList<>(
|
||||
Stream
|
||||
.concat(
|
||||
Optional
|
||||
.ofNullable(c1.getDataInfo())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()),
|
||||
Optional
|
||||
.ofNullable(c2.getDataInfo())
|
||||
.map(List::stream)
|
||||
.orElse(Stream.empty()))
|
||||
.collect(
|
||||
Collectors
|
||||
.toMap(
|
||||
d -> Optional
|
||||
.ofNullable(d.getProvenanceaction())
|
||||
.map(Qualifier::getClassid)
|
||||
.orElse(""),
|
||||
d -> d,
|
||||
(d1, d2) -> d1))
|
||||
.values()));
|
||||
return c;
|
||||
}))
|
||||
.values());
|
||||
}
|
||||
|
||||
private static <T extends Oaf> Dataset<T> readFromPath(
|
||||
SparkSession spark, String path, Class<T> clazz) {
|
||||
return spark
|
||||
|
|
|
@ -480,38 +480,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
|
||||
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
|
||||
|
||||
final Relation r1 = new Relation();
|
||||
final Relation r2 = new Relation();
|
||||
|
||||
if (StringUtils.isNotBlank(validationDate)) {
|
||||
r1.setValidated(true);
|
||||
r1.setValidationDate(validationDate);
|
||||
r2.setValidated(true);
|
||||
r2.setValidationDate(validationDate);
|
||||
}
|
||||
r1.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
||||
r1.setSource(sourceId);
|
||||
r1.setTarget(targetId);
|
||||
r1.setDataInfo(DATA_INFO_CLAIM);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
r2.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
||||
r2.setSource(targetId);
|
||||
r2.setTarget(sourceId);
|
||||
r2.setDataInfo(DATA_INFO_CLAIM);
|
||||
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
Relation r1 = prepareRelation(sourceId, targetId, validationDate);
|
||||
Relation r2 = prepareRelation(targetId, sourceId, validationDate);
|
||||
|
||||
final String semantics = rs.getString("semantics");
|
||||
|
||||
switch (semantics) {
|
||||
case "resultResult_relationship_isRelatedTo":
|
||||
r1.setRelType(RESULT_RESULT);
|
||||
r1.setSubRelType(RELATIONSHIP);
|
||||
r1.setRelClass(IS_RELATED_TO);
|
||||
|
||||
r2.setRelType(RESULT_RESULT);
|
||||
r2.setSubRelType(RELATIONSHIP);
|
||||
r2.setRelClass(IS_RELATED_TO);
|
||||
r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
|
||||
r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
|
||||
break;
|
||||
case "resultProject_outcome_produces":
|
||||
if (!"project".equals(sourceType)) {
|
||||
|
@ -521,13 +498,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
"invalid claim, sourceId: %s, targetId: %s, semantics: %s",
|
||||
sourceId, targetId, semantics));
|
||||
}
|
||||
r1.setRelType(RESULT_PROJECT);
|
||||
r1.setSubRelType(OUTCOME);
|
||||
r1.setRelClass(PRODUCES);
|
||||
|
||||
r2.setRelType(RESULT_PROJECT);
|
||||
r2.setSubRelType(OUTCOME);
|
||||
r2.setRelClass(IS_PRODUCED_BY);
|
||||
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
|
||||
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
|
||||
break;
|
||||
case "resultResult_publicationDataset_isRelatedTo":
|
||||
r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
|
||||
r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("claim semantics not managed: " + semantics);
|
||||
|
@ -540,6 +516,27 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
}
|
||||
}
|
||||
|
||||
private Relation prepareRelation(String sourceId, String targetId, String validationDate) {
|
||||
Relation r = new Relation();
|
||||
if (StringUtils.isNotBlank(validationDate)) {
|
||||
r.setValidated(true);
|
||||
r.setValidationDate(validationDate);
|
||||
}
|
||||
r.setCollectedfrom(COLLECTED_FROM_CLAIM);
|
||||
r.setSource(sourceId);
|
||||
r.setTarget(targetId);
|
||||
r.setDataInfo(DATA_INFO_CLAIM);
|
||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
return r;
|
||||
}
|
||||
|
||||
private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) {
|
||||
r.setRelType(relType);
|
||||
r.setSubRelType(subRelType);
|
||||
r.setRelClass(relClass);
|
||||
return r;
|
||||
}
|
||||
|
||||
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
|
||||
final Context context = new Context();
|
||||
context.setId(id);
|
||||
|
|
Loading…
Reference in New Issue