Merge branch 'stable_ids' of https://code-repo.d4science.org/D-Net/dnet-hadoop into stable_ids

This commit is contained in:
Miriam Baglioni 2021-07-08 18:57:25 +02:00
commit 6e987fc084
14 changed files with 2200 additions and 58 deletions

View File

@ -131,18 +131,9 @@ public class HttpConnector2 {
} }
return attemptDownload(newUrl, retryNumber + 1, report); return attemptDownload(newUrl, retryNumber + 1, report);
} }
if (is4xx(urlConn.getResponseCode())) { if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) {
// CLIENT ERROR, DO NOT RETRY
report
.put(
REPORT_PREFIX + urlConn.getResponseCode(),
String
.format(
"%s error: %s", requestUrl, urlConn.getResponseMessage()));
throw new CollectorException("4xx error: request will not be repeated. " + report);
}
if (is5xx(urlConn.getResponseCode())) {
switch (urlConn.getResponseCode()) { switch (urlConn.getResponseCode()) {
case HttpURLConnection.HTTP_NOT_FOUND:
case HttpURLConnection.HTTP_BAD_GATEWAY: case HttpURLConnection.HTTP_BAD_GATEWAY:
case HttpURLConnection.HTTP_UNAVAILABLE: case HttpURLConnection.HTTP_UNAVAILABLE:
case HttpURLConnection.HTTP_GATEWAY_TIMEOUT: case HttpURLConnection.HTTP_GATEWAY_TIMEOUT:

View File

@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
public class OaiCollectorPlugin implements CollectorPlugin { public class OaiCollectorPlugin implements CollectorPlugin {
public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}";
public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z";
private static final String FORMAT_PARAM = "format"; private static final String FORMAT_PARAM = "format";
private static final String OAI_SET_PARAM = "set"; private static final String OAI_SET_PARAM = "set";
private static final Object OAI_FROM_DATE_PARAM = "fromDate"; private static final Object OAI_FROM_DATE_PARAM = "fromDate";
@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
throw new CollectorException("Param 'mdFormat' is null or empty"); throw new CollectorException("Param 'mdFormat' is null or empty");
} }
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
} }
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
} }

View File

@ -107,10 +107,12 @@ public class OaiIterator implements Iterator<String> {
if (set != null && !set.isEmpty()) { if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8"); url += "&set=" + URLEncoder.encode(set, "UTF-8");
} }
if (fromDate != null && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
} }
if (untilDate != null && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
} }
log.info("Start harvesting using url: " + url); log.info("Start harvesting using url: " + url);

View File

@ -0,0 +1,143 @@
// from PROD 2021-07-06 , tf script of HAL with around 3mill. records
declare_script "dc_cleaning_OpenAIREplus_compliant_hal";
declare_ns oaf = "http://namespace.openaire.eu/oaf";
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
declare_ns dc = "http://purl.org/dc/elements/1.1/";
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
declare_ns oai = "http://www.openarchives.org/OAI/2.0/";
declare_ns xs = "http://www.w3.org/2001/XMLSchema";
$var0 = "''";
$varFP7 = "'corda_______::'";
$varH2020 = "'corda__h2020::'";
$varDummy = "''";
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
static $varRepoid = xpath:"//dri:repositoryId";
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
dri:objIdentifier = xpath:"//dri:objIdentifier";
dri:repositoryId = $varRepoid;
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
//
// communities - deactivated until received green light from DARIAH to mark community on prod also
// $varCommunity = xpath:"//*[local-name()='setSpec'][starts-with(., 'collection:DARIAH')]/'dariah'";
// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records)
// oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;);
//
// apply xpath:"//dc:contributor[starts-with(., 'European Project')]" if xpath:"string-length(replace(., '.*(\d{6,6}).*', '$1')) = 6" oaf:projectid = xpath:"concat($var1, replace(., '.*(\d{6,6}).*', '$1'))"; else $varDummy = "''";
apply xpath:"//dc:creator" if xpath:"string-length(.) &gt; 0 and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
if xpath:"//dc:title[string-length(.)&gt; 0]" $varDummy = "''"; else dc:coverage = skipRecord();
dc:title = xpath:"//dc:title[string-length(.) &gt; 0]/normalize-space(.)";
apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
dc:contributor = xpath:"//dc:contributor";
// dc:description = xpath:"//dc:description/normalize-space(.)";
//dc:description = xpath:"string-join(//dc:description/normalize-space(.), concat('; ',codepoints-to-string(10)))";
dc:description = xpath:"string-join(//dc:description/normalize-space(.), '; ')";
dc:format = xpath:"//dc:format";
$varHttpTest = "''";
oaf:fulltext = xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))]";
//if xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))] or //dc:relation[starts-with(lower-case(normalize-space(.)), 'info:eu-repo/grantagreement')] or //dc:rights[starts-with(lower-case(normalize-space(.)), 'open') or contains(lower-case(normalize-space(.)), 'openaccess')] or //dc:accessRights[contains(lower-case(normalize-space(.)), 'openaccess')]" $var0 = "''"; else dc:coverage = skipRecord();
if xpath:"//dc:identifier[starts-with(., 'http')]" $var0 = "''"; else dc:coverage = skipRecord();
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
static dr:dateOfTransformation = xpath:"current-dateTime()";
dc:type = xpath:"//dc:type";
dc:format = xpath:"//dc:format";
dc:date = xpath:"//dc:date";
dc:language = Convert(xpath:"//dc:language", Languages);
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
if xpath:"starts-with($varDateAccepted, '0')" oaf:dateAccepted = $varDummy; else oaf:dateAccepted = $varDateAccepted;
$varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')";
oaf:embargoenddate = $varEmbargoEnd;
// FP7
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
// H2020
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2012][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
// H2020 workaround for HAL
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement//)(\d\d\d\d\d\d)(.*)', 'i')][//dc:contributor[contains(lower-case(.), 'h2020')]][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2012][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement//)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
dc:relation = xpath:"//dc:relation";
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
//
oaf:collectedDatasourceid = xpath:"$varDatasourceid";
//
//if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type) | //oai:setSpec", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
$varCobjCategoryReverse = Convert(xpath:"insert-before(reverse(//dc:type) , 0, reverse(//oai:setSpec))", TextTypologies);
$varSuperTypeReverse = Convert(xpath:"normalize-space($varCobjCategoryReverse)", SuperTypes);
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']/$varCobjCategoryReverse", @type = $varSuperTypeReverse;);
$varCobjCategoryStraight = Convert(xpath:"insert-before(//dc:type , 100, //oai:setSpec)", TextTypologies);
$varSuperTypeStraight = Convert(xpath:"normalize-space($varCobjCategoryStraight)", SuperTypes);
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other'))]/$varCobjCategoryStraight", @type = $varSuperTypeStraight;);
//
// review level
// oaf:refereed = Convert(xpath:"//dc:description", ReviewLevels);
$varRefereedConvt = Convert(xpath:"(//dc:type, //oai:setSpec, //dc:description)", ReviewLevels);
$varRefereedDesct = xpath:"(//dc:description[matches(lower-case(.), '.*(this\s*book|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*')]/'0001', //dc:description[matches(., '^version\s*(préliminaire.*|0$)')]/'0002')";
$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)])pre[\.\-_/\s\(\)]?prints?([\.\-_/\s\(\)].*)?$')][count(//dc:identifier) = 1]/'0002', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)])refereed([\.\-_/\s\(\)\d].*)?$')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and contains(lower-case(.), '-peer-reviewed-article-')]/'0001')";
$varRefereed = xpath:"($varRefereedConvt, $varRefereedIdntf, $varRefereedDesct)";
if xpath:"count(index-of($varRefereed, '0001')) &gt;0" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
if xpath:"count(index-of($varRefereed, '0002')) &gt;0 and count(index-of($varRefereed, '0001')) = 0" oaf:refereed = xpath:"'0002'"; else $varDummy= "''";
//
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
//2021-06-01 ; acz ; next line to avoid to be OPEN as default, set to UNKNOWN , 2021-07-05 acz
//if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "UNKNOWN";
oaf:license = xpath:"//dc:rights[starts-with(., 'http') or matches(., '^CC[- ]BY([- ](NC([- ](ND|SA))?|ND|SA))([- ]\d(\.\d)?)?$', 'i')]";
//
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
//
//$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(., \"info:\") or starts-with(., \"urn:\") or starts-with(., \"doi:\") or starts-with(., \"DOI:\") or starts-with(., \"Doi:\") or starts-with(., \"doi \") or starts-with(., \"DOI \") or starts-with(., \"Doi \") or starts-with(., \"10.\") or ((starts-with(., \"http\")) and contains(., \"doi.org/10.\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/10.\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/http\")) and contains(., \"doi.org/10.\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
$varIdHdl = identifierExtract('["//dc:identifier[starts-with(., \"HDL:\") and not(starts-with(., \"HDL: http\"))][not(contains(., \"123456789\"))]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/hdl/\") or (starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/url/\") and contains(., \"://hdl.handle.net/\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(info:hdl:|://hdl.handle.net/|info:eu-repo/semantics/altIdentifier/hdl/))(\d.*)');
$varIdIsbn = xpath:"(//dc:identifier, //dc:source)[starts-with(lower-case(.), 'isbn') or starts-with(., '978') or starts-with(., '979')][(matches(., '(isbn[:\s]*)?97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(concat('97', substring-after(., '97'))) = 17) or matches(., '(isbn[:\s]*)?97[89]\d{10}$', 'i')]/replace(., 'isbn[:\s]*', '', 'i'), //dc:relation[starts-with(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')][(matches(., 'info:eu-repo/semantics/altIdentifier/isbn/97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(.) = 59) or matches(., 'info:eu-repo/semantics/altidentifier/isbn/97[89]\d{10}$', 'i')]/substring-after(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')";
$varIdBibc = identifierExtract('["//dc:identifier[starts-with(., \"BibCode:\") or starts-with(., \"BIBCODE:\") or (starts-with(., \"http:\") and contains(., \"bibcode=\"))]"]' , xpath:"./*[local-name()='record']" , '(^(BibCode:|BIBCODE:|http).*$)');
$varIdPtnt = identifierExtract('["//dc:identifier[starts-with(., \"Patent N°:\")]"]' , xpath:"./*[local-name()='record']" , '(^Patent N°:.*$)');
$varPmId = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"PUBMED:\")]"]' , xpath:"./*[local-name()='record']" , '(?!PUBMED: )(\d+)');
$varIdPmc = identifierExtract('["//dc:identifier[starts-with(., \"PUBMEDCENTRAL:\") or (starts-with(., \"http\") and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/PMC\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/http\")) and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]"]' , xpath:"./*[local-name()='record']" , '(PMC\d+)');
//$varIdHal = identifierExtract('["//dc:identifier[starts-with(., \"ads-\") or starts-with(., \"anses-\") or starts-with(., \"artxibo-\") or starts-with(., \"bioemco-\") or starts-with(., \"cea-\") or starts-with(., \"cel-\") or starts-with(., \"cirad-\") or starts-with(., \"edutice-\") or starts-with(., \"emse-\") or starts-with(., \"EMSE-\") or starts-with(., \"ensl-\") or starts-with(., \"hal-\") or starts-with(., \"HAL-\") or starts-with(., \"halsde-\") or starts-with(., \"halshs-\") or starts-with(., \"hprints-\") or starts-with(., \"in2p3-\") or starts-with(., \"ineris-\") or starts-with(., \"inria-\") or starts-with(., \"Inria-\") or starts-with(., \"inserm-\") or starts-with(., \"insu-\") or starts-with(., \"INSU-\") or starts-with(., \"ird-\") or starts-with(., \"irsn-\") or starts-with(., \"jpa-\") or starts-with(., \"lirmm-\") or starts-with(., \"medihal-\") or starts-with(., \"meteo-\") or starts-with(., \"mnhn-\") or starts-with(., \"obspm-\") or starts-with(., \"pastel-\") or starts-with(., \"pasteur-\") or starts-with(., \"Pasteur-\") or starts-with(., \"peer-\") or starts-with(., \"ssa-\") or starts-with(., \"tel-\") or starts-with(., \"ujm-\") or starts-with(., \"ijn_\") or starts-with(., \"sic_\") or (starts-with(., \"http\") and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\") or contains(., \"://medihal.archives-ouvertes.fr/hal\")))]", "//dc:relation[((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\")) and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\")))]"]' , xpath:"./*[local-name()='record']" , '((ads|anses|artxibo|bioemco|cea|cel|cirad|edutice|emse|EMSE|ensl|hal|HAL|halsde|halshs|hprints|in2p3|ineris|inria|Inria|inserm|insu|INSU|ird|irsn|jpa|lirmm|medihal|meteo|mnhn|obspm|pastel|pasteur|Pasteur|peer|ssa|tel|ujm)-|(ijn|sic)_).*');
$varIdHal = identifierExtract('["//*[local-name() = \"recordIdentifier\"]"]' , xpath:"./*[local-name()='record']" , '(oai:HAL:.*)');
$varIdArxv = identifierExtract('["//dc:identifier[((starts-with(., \"http\") or starts-with(., \"ArXiv: http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\"))) or starts-with(., \"arXiv:\") or starts-with(., \"ARXIV:\")]", "//dc:relation[(starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/\") and not(contains(., \"/arxiv/http\"))) or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\")))]"]' , xpath:"./*[local-name()='record']" , '(?!(://arxiv.org/abs/|:eu-repo/semantics/altIdentifier/arxiv/))([a-zA-Z].*)');
$varIdWos = identifierExtract('["//dc:identifier[starts-with(., \"WOS:\") or starts-with(., \"wos: WOS:\")]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/wos/\")]"]' , xpath:"./*[local-name()='record']" , '(info.*|WOS:.+|wos: WOS:.+)');
//oaf:identifier = set(xpath:"$varId//value[not[. = '10.1145/nnnnnnn.nnnnnnn']]", @identifierType = "doi";);
oaf:identifier = set(xpath:"$varIdDoi//value[not(. = '10.1145/nnnnnnn.nnnnnnn')]", @identifierType = "doi";);
oaf:identifier = set(xpath:"$varIdHdl//value", @identifierType = "handle";);
oaf:identifier = set(xpath:"$varIdIsbn", @identifierType = "isbn";);
oaf:identifier = set(xpath:"($varIdBibc//value[not(starts-with(., 'http'))]/replace(., 'BIBCODE:\s*', ''), $varIdBibc//value[starts-with(., 'http') and contains(substring-after(., 'bibcode='), codepoints-to-string(38))]/substring-before(substring-after(., 'bibcode='), codepoints-to-string(38)), $varIdBibc//value[starts-with(., 'http') and not(contains(substring-after(., 'bibcode='), codepoints-to-string(38)))]/substring-after(., 'bibcode='))", @identifierType = "bibcode";);
oaf:identifier = set(xpath:"$varIdPtnt//value/normalize-space(substring-after(., 'Patent N°:'))", @identifierType = "patentNumber";);
oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";);
oaf:identifier = set(xpath:"$varIdPmc//value", @identifierType = "pmcid";);
//oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(., '(/document|/image|/file/.*)$', ''))", @identifierType = "hal";);
oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', ''))", @identifierType = "hal";);
oaf:identifier = set(xpath:"distinct-values(($varIdArxv//value/normalize-space(replace(., '(https?://arxiv.org/abs/|https?://arxiv.org/pdf/|info:eu-repo/semantics/altIdentifier/arxiv/|info:eu-repo/semantics/altIdentifier/url/|info:eu-repo/semantics/altIdentifier/urn/|arXiv:|\.pdf)', '', 'i'))))", @identifierType = "arxiv";);
oaf:identifier = set(xpath:"$varIdWos//value/normalize-space(replace(., '(info:eu-repo/semantics/altIdentifier/wos/|WOS:|wos:)', ''))", @identifierType = "wos";);
oaf:identifier = set(xpath:"distinct-values(//dc:identifier[starts-with(., 'http') and contains(., $varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', ''))]/replace(., '(/document|/image|/file/.*)$', ''))", @identifierType = "landingPage";);
oaf:identifier = set(xpath:"distinct-values(//dc:identifier[starts-with(., 'http') and not(ends-with(., $varIdHal//value/replace(substring-after(., 'oai:HAL:'), '(v\d*)$', '')))])", @identifierType = "url";);
oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-original";);
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
// journal data
// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...)
$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) &gt; 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))";
//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) &gt; 13]/normalize-space(substring-after(., 'ISSN:'))";
$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) &gt; 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))";
oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";);
end

View File

@ -0,0 +1,140 @@
// from PROD 2021-07-06 , tf script of DOAJ with more than 6mill. records
declare_script "dc_cleaning_OpenAIREplus_compliant_doaj";
declare_ns oaf = "http://namespace.openaire.eu/oaf";
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
declare_ns dc = "http://purl.org/dc/elements/1.1/";
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
$var0 = "''";
$varFP7 = "'corda_______::'";
$varH2020 = "'corda__h2020::'";
$varDummy = "''";
// $varUnknownRepoId = "'openaire____::55045bd2a65019fd8e6741a755395c8c'";
//
$varUnknownRepoId = "'openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18'";
$varUnknownRepoName = "'Unknown Repository'";
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
static $varRepoid = xpath:"//dri:repositoryId";
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
dri:objIdentifier = xpath:"//dri:objIdentifier";
dri:repositoryId = $varRepoid;
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
if xpath:"//dc:creator[string-length(normalize-space(.)) &amp;gt; 0][contains(., 'CDATA')][starts-with(normalize-space(.), '(')][starts-with(normalize-space(.), '.')]" dc:creator = skipRecord(); else $varDummy = "''";
//apply xpath:"//dc:creator" if xpath:"string-length(normalize-space(.)) &amp;amp;gt; 0 and not(contains(., 'CDATA')) and not(starts-with(normalize-space(.), '.')) and not(starts-with(normalize-space(.), '('))" dc:creator = Convert(xpath:".", Person); else $varDummy = "''";
if xpath:"count(//dc:creator) = 0" dc:creator = skipRecord(); else $varDummy = "''";
//apply xpath:"//dc:creator" if xpath:"string-length(.) &amp;gt; 0 and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
$varOrcidName = xpath:"//dc:creator[string-length(normalize-space(.)) &gt; 0]";
$varOrcidOrcid = xpath:"//dc:creator[string-length(normalize-space(.)) &gt; 0]/@id/replace(., 'https?://orcid.org/', '')";
dc:creator = set(xpath:"$varOrcidName", @nameIdentifier = xpath:"subsequence($varOrcidOrcid,position(),1)";, @nameIdentifierScheme=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','ORCID')";, @schemeUri=xpath:"replace(subsequence($varOrcidOrcid,position(),1),'^.+$','http://orcid.org/')";);
if xpath:"count(//dc:title[string-length(.) &amp;gt; 0]) = 0" dc:title = skipRecord(); else $varDummy = "''";
dc:title = xpath:"//dc:title/normalize-space(replace(., '^(&amp;lt;title language=)(.)*(&amp;gt;)', ''))";
// apply xpath:"//dc:title" if xpath:"string-length(.) &amp;gt; 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
apply xpath:"//dc:subject" if xpath:"string-length(.) &amp;gt; 0 and not(@xsi:type = 'dcterms:LCSH')" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
dc:subject = set(xpath:"//dc:subject[@xsi:type = 'dcterms:LCSH']/concat('lcsh:', .)", @classid=xpath:"'lcsh'";, @classname=xpath:"'lcsh'";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
apply xpath:"//dc:publisher" if xpath:"string-length(.) &amp;gt; 0" dc:publisher = xpath:"normalize-space(replace(., '(&amp;lt;br&amp;gt;)', ''))"; else $varDummy = "''";
apply xpath:"//dc:source" if xpath:"string-length(.) &amp;gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
dc:contributor = xpath:"//dc:contributor";
dc:description = xpath:"//dc:description[not(starts-with(., 'URN: urn:nbn:') or starts-with(., 'URN: http'))]";
dc:format = xpath:"//dc:format";
$varHttpTest = "''";
if xpath:"//dc:relation[starts-with(., 'http') or starts-with(., 'www.')]" $varHttpTest = "true"; else dc:identifier = skipRecord();
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'www.')" dc:identifier = xpath:"concat('http://', normalize-space(.))"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
dr:CobjIdentifier = xpath:"distinct-values(//dc:identifier[not(starts-with(normalize-space(.), 'http'))][not(normalize-space(.) = ($varIdList))][not(starts-with(normalize-space(.), 'urn:nbn:') or starts-with(normalize-space(.), 'URN:NBN:'))][not(. = ($varISSN[1], $varISSN[2]))][normalize-space(.) != ''])";
dc:identifier = xpath:"($varIdUrl//value[not(starts-with(., 'www'))], $varIdUrl//value[starts-with(., 'www')]/concat('http://', .), $varIdLdpg//value, $varIdDoi//value)[1]";
dc:relation = xpath:"//dc:relation[starts-with(., 'https://doaj.org/toc/')]";
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
static dr:dateOfTransformation = xpath:"current-dateTime()";
// dc:type = xpath:"//dc:type";
dc:language = Convert(xpath:"//dc:language", Languages);
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
dc:date = xpath:"//dc:date";
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
//apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
//
oaf:collectedDatasourceid = $varDatasourceid;
//
// apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
//dr:CobjCategory = "0001";
$varCobjCategory = Convert(xpath:"//dc:type", TextTypologies);
$varSuperType = Convert(xpath:"normalize-space($varCobjCategory)", SuperTypes);
dr:CobjCategory = set($varCobjCategory, @type = $varSuperType;);
dc:type = xpath:"//dc:type";
//
// review status
$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:identifier' and matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]/'0001', //*[string(node-name(.)) = 'dc:relation' and matches(., '^info:eu-repo/semantics/altIdentifier/doi/10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$', 'i')]/'0001')";
$varRefereedProse = xpath:"(//*[string(node-name(.)) = 'dc:description' and matches(lower-case(.), '.*this\s*preprint\s*has\s*been\s*reviewed\s*and\s*recommended\s*by\s*peer\s*community') and contains(., '10.24072/')]/'0001', //dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001')";
$varRefereedReltn = xpath:"(//dc:relation, //dc:identifier)[contains(., '://www.dovepress.com/') and matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001'";
$varRefereedTitle = xpath:"//dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001'";
$varRefereedDesct = xpath:"(//dc:description[matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001')";
$varRefereed = xpath:"($varRefereedIdntf, $varRefereedProse, $varRefereedReltn, $varRefereedTitle, $varRefereedDesct)";
//if xpath:"$varRefereed" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
if xpath:"count(index-of($varRefereed, '0001')) &gt;0" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
if xpath:"count(index-of($varRefereed, '0002')) &gt;0 and count(index-of($varRefereed, '0001')) = 0" oaf:refereed = xpath:"'0002'"; else $varDummy= "''";
//
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
oaf:license = xpath:"(//dc:rights, //dc:relation)[starts-with(normalize-space(.), 'http') and (contains(., '/licenses/') or contains(., '/licence/') or contains(., '/licencias/') or contains(., '/licencia/') or contains(., '://creativecommons.org/') or contains(., '://rightsstatements.org/')) or matches(., '^CC[- ]BY([- ](NC([- ](ND|SA))?|ND|SA))([- ]\d(\.\d)?)?$', 'i')][not(contains(normalize-space(.), ' '))]/normalize-space(.)";
//
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
//
//$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&amp;lt;&amp;gt;]*/[^\s"&amp;lt;&amp;gt;]+)');
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(., \"10.\") or starts-with(., \"DOI:\") or starts-with(., \"doi:\") or (starts-with(., \"http\") and contains(., \"doi.org/\"))]", "//dc:relation[starts-with(., \"10.\") or starts-with(., \"DOI:\") or starts-with(., \"doi:\") or (starts-with(., \"http\") and contains(., \"doi.org/\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
$varIdHdl = identifierExtract('["//dc:relation[starts-with(., \"http\") and contains(., \"://hdl.handle.net/\")][not(contains(., \"123456789\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(://hdl.handle.net/))(\d.*)');
$varIdUrn = identifierExtract('["//dc:relation[starts-with(., \"urn:nbn:\") or starts-with(., \"URN:NBN:\") or (starts-with(., \"http\") and (contains(., \"://nbn-resolving.org/urn:nbn:\") or contains(., \"://nbn-resolving.de/urn/resolver.pl?urn:nbn:\") or contains(., \"://nbn-resolving.de/urn:nbn:\") or contains(., \"://resolver.obvsg.at/urn:nbn:\") or contains(., \"://urn.fi/URN:NBN:\") or contains(., \"://urn.kb.se/resolve?urn=urn:nbn:\")))]", "//dc:description[contains(., \"URN: urn:nbn:de:0114-\") or contains(., \"URN: http://nbn-resolving.de/urn:nbn:de:0114-\") or (contains(., \"URN:NBN:no-\") and //dc:identifier = \"1893-1774\")]"]' , xpath:"./*[local-name()='record']" , '((urn:nbn:|URN:NBN:).*)');
$varIdArk = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"http\") and contains(., \"/ark:\")]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
$varIdPmid = identifierExtract('["//dc:relation[starts-with(., \"http\") and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
$varIdPmc = identifierExtract('["//dc:relation[starts-with(., \"http\") and (contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\") or contains(., \"//europepmc.org/articles/PMC\"))]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
$varIdHal = identifierExtract('["//dc:relation[starts-with(., \"hal-\") or starts-with(., \"halshs-\") or starts-with(., \"halsde-\") or (starts-with(., \"http\") and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\")))]"]' , xpath:"./*[local-name()='record']" , '(hal(shs|sde)?-.*)');
$varIdArxv = identifierExtract('["//dc:relation[starts-with(., \"http\") and (contains(., \"://arxiv.org/pdf/\") or contains(., \"://arxiv.org/abs/\"))]"]' , xpath:"./*[local-name()='record']" , '(\d.*)');
$varIdLdpg = identifierExtract('["//dc:identifier[starts-with(., \"https://doaj.org/article/\")]"]', xpath:"./*[local-name()='record']" , '(http.*)');
$varIdUrl = identifierExtract('["//dc:relation[starts-with(., \"http\")][not(contains(., \"://doaj.org\"))][not(contains(., \"doi.org/\"))][not(contains(., \"hdl.handle.net/\"))][not(contains(., \"://nbn-resolving.de/\") or contains(., \"://nbn-resolving.org/\") or contains(., \"://resolver.obvsg.at/\") or contains(., \"://urn.fi/URN:NBN:\") or contains(., \"://urn.kb.se/resolve\"))][not(contains(., \"://arxiv.org/pdf/\") or contains(., \"://arxiv.org/abs/\"))][not(contains(., \"://localhost/\") or contains(., \"://localhost:\"))]", "//dc:relation[starts-with(., \"www\")]"]', xpath:"./*[local-name()='record']" , '((http|www).*)');
$varIdList = xpath:"(($varIdDoi//value, $varIdHdl//value, $varIdUrn//value, $varIdArk//value, $varIdPmid//value, $varIdPmc//value, $varIdLdpg//value, $varIdUrl//value))";
// dropping/cleaning wrong DOIs, as
// 2 DOIs just different in 1 ending with . (mostly, but not exclusively, prefixed with 10.5216)
// noise stemming from odd/wrong DOI statements' formats
// DOIs with 2 prefixes
// DOI statements containing first the DOI prefix and then the DOI incl. the resolver prefix
//oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
//oaf:identifier = set(xpath:"$varIdDoi//value", @identifierType = "doi";);
oaf:identifier = set(xpath:"distinct-values(($varIdDoi//value[not(ends-with(., '.') and exists(index-of($varIdDoi//value, substring(., 1, string-length(.)-1))))][not(. = '10.4313/article-4')][not(lower-case(.) = ('10.30659/ijibe.2.1.171-181', '10.30659/ijibe.2.1.171', '10.26843/rencima.v8i4.149', '10.26843/rencima.v11i1.215', '10.18273/revfue.v14n2-2016002revista', '10.17061/phrp3112015', '10.21789/24222704', '10.22432/pjsr.2017.14.', '10.22432/pjsr.2017.18.02', '10.22432/pjsr.2017.18.'))][not(starts-with(., '10.1530/VAB-'))][not(starts-with(lower-case(.), '10.1155/s168761720'))][not(starts-with(., '10.15561/10.6084/') or starts-with(., '10.5935/10.19180/'))][not(starts-with(., '10.7454/jvi.v') and string-length(.) = 16)][not(starts-with(., '10.15094/0000') and string-length(.) = 16)][not(matches(., '^10\.\d*/DOI:$'))][not(starts-with(., concat(substring-before(., '/'), '/', substring-before(., '/'), '/')))][not(matches(substring-after(., '/'), '^https?://(dx.)?doi.org/.*') and starts-with(substring-after(., 'doi.org/'), substring-before(., '/')))][not(starts-with(., '10.1371/journal.') and matches(., '^10\.1371/journal\.[a-z]{4}\.\d{7}\.(eor|20050521)$'))][not(substring-before(., '/') = ('10.19183', '10.18066') and matches(., '^(10\.19183/how\.\d*\.\d*|10\.18066/revunivap\.v\d*i\d*)$'))]/lower-case(.), $varIdDoi//value[matches(substring-after(., '/'), '^https?://(dx.)?doi.org/.*') and starts-with(substring-after(., 'doi.org/'), substring-before(., '/'))]/substring-after(., 'doi.org/'), $varIdDoi//value[starts-with(., '10.1371/journal.') and matches(., '^10\.1371/journal\.[a-z]{4}\.\d{7}\.eor$')]/substring(., 1, 28), $varIdDoi//value[starts-with(., '10.15561/10.6084/') or starts-with(., '10.5935/10.19180/')]/substring-after(., '/')))", @identifierType = "doi";);
oaf:identifier = set(xpath:"distinct-values($varIdHdl//value/normalize-space(replace(., '\?locatt=view:master', '')))", @identifierType = "handle";);
oaf:identifier = set(xpath:"$varIdUrn//value", @identifierType = "urn";);
oaf:identifier = set(xpath:"distinct-values($varIdArk//value/replace(substring-after(., '/ark:'), '^/', ''))", @identifierType = "ark";);
oaf:identifier = set(xpath:"distinct-values($varIdPmid//value/replace(., 'https?://www.ncbi.nlm.nih.gov/pmc/articles/pmid/(\d+)(/.*)?', '$1'))", @identifierType = "pmid";);
oaf:identifier = set(xpath:"distinct-values($varIdPmc//value/replace(., 'https?://(www.ncbi.nlm.nih.gov/pmc|europepmc.org)/articles/(PMC\d*)([/\?].*)?', '$2'))", @identifierType = "pmcid";);
oaf:identifier = set(xpath:"distinct-values($varIdHal//value/replace(., '/document', ''))", @identifierType = "hal";);
oaf:identifier = set(xpath:"$varIdArxv//value", @identifierType = "arxiv";);
oaf:identifier = set(xpath:"$varIdLdpg//value", @identifierType = "landingPage";);
oaf:identifier = set(xpath:"($varIdUrl//value[not(starts-with(., 'www'))], $varIdUrl//value[starts-with(., 'www')]/concat('http://', .))", @identifierType = "url";);
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
//$varJournalName = xpath:"substring-before(//dc:source, ',')";
$varJournalTitle = xpath:"(//dc:source[contains(., ', Vol ')]/substring-before(., ', Vol '), //dc:source[contains(., ', Iss ')]/substring-before(., ', Iss '))[1]";
$varVol = xpath:"//dc:source[contains(., ', Vol ')][matches(., ', Vol \d+')]/replace(substring-after(., ', Vol '), '^(\d+).*$', '$1')";
$varIss = xpath:"//dc:source[contains(., ', Iss ')][matches(., ', Iss \d+')]/replace(substring-after(., ', Iss '), '^(\d+).*$', '$1')";
$varSp = xpath:"//dc:source[contains(., ', Pp ')][matches(., ', Pp \d+-\d+')]/substring-before(substring-after(., ', Pp '), '-')";
$varEp = xpath:"//dc:source[contains(., ', Pp ')][matches(., ', Pp \d+-\d+')]/replace(substring-after(substring-after(., ', Pp '), '-'), '^(\d+).*$', '$1')";
$varISSN = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][1]";
//oaf:journal = set($varJournalName, @issn = xpath:"//dc:identifier[string-length() = 9 and matches(., '^(\d{4})-(\d{4}|\d{3}X)')][1]"; , @eissn = xpath:"//dc:identifier[string-length() = 9 and matches(., '^(\d{4})-(\d{4}|\d{3}X)')][2]";);
//oaf:journal = set($varJournalName, @issn = xpath:"//dc:identifier[string-length() = 9]";);
oaf:journal = set($varJournalTitle, @issn = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][1]";, @eissn = xpath:"//dc:identifier[string-length() = 9 and matches(., '(\d{4})-(\d{4})')][2]";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";);
end

View File

@ -0,0 +1,492 @@
<!-- from PROD 2021-06-14 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:transformExt="http://namespace.openaire.eu/java/org.apache.commons.codec.digest.DigestUtils"
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
extension-element-prefixes="transformExt TransformationFunction"
exclude-result-prefixes="transformExt TransformationFunction" >
<xsl:output indent="yes" omit-xml-declaration="yes"/>
<!--
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
-->
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
<xsl:param name="varFP7OtherDOI" select="'10.13039/100011102'"/>
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
<xsl:param name="varFP7" select="'corda_______::'"/>
<xsl:param name="varH2020" select="'corda__h2020::'"/>
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
<xsl:param name="index" select="0"/>
<xsl:param name="transDate" select="current-dateTime()"/>
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<metadata>
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
<xsl:call-template name="terminate"/>
</xsl:if>
<!-- in journal.fi xml:lang of translated titles is not within the trans-title element but within the surrounding trans-title-group element (which just contains 1 trans-title element) -->
<!--
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()=('article-title', 'trans-title-group')][string-length(normalize-space(.))> 0]"/>
<xsl:with-param name="targetElement" select="'dc:title'"/>
</xsl:call-template>
-->
<xsl:call-template name="title">
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name()='title-group']//*[local-name()=('article-title', 'trans-title', 'subtitle', 'trans-subtitle')]"/>
</xsl:call-template>
<xsl:call-template name="authors">
<!--
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))]"/>
-->
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group'][@content-type='author']/*[local-name() = 'contrib']"/>
</xsl:call-template>
<!-- <xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
</xsl:call-template>
-->
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()=('abstract', 'trans-abstract')]"/>
<xsl:with-param name="targetElement" select="'dc:description'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
<xsl:with-param name="targetElement" select="'dc:subject'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group']//*[local-name()='kwd']"/>
<xsl:with-param name="targetElement" select="'dc:subject'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
<xsl:with-param name="targetElement" select="'dc:source'"/>
</xsl:call-template>
<xsl:element name="dc:language">
<xsl:value-of select="//*[local-name()='metadata']//*[local-name()='article']/@xml:lang" />
</xsl:element>
<xsl:element name="dc:identifier">
<xsl:value-of select="//*[local-name()='article-meta']/*[local-name()='self-uri'][contains(./@xlink:href, '/view/')]/@xlink:href" />
</xsl:element>
<xsl:element name="oaf:dateAccepted">
<!--
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])" />
<xsl:value-of select="TransformationFunction:Convert($tf, //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub'], 'DateISO8601', 'yyyy-MM-dd', 'min()')" />
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/replace(concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day']), '-(\d)([-$])', '-0$1$2')" />
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
concat(./*[local-name()='year'], '-',
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']), 2), '-',
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']), 2))" />
-->
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
concat(./*[local-name()='year'], '-',
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']) idiv 2 + 1, 2), '-',
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']) idiv 2 +1, 2))" />
</xsl:element>
<xsl:for-each select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub']">
<xsl:choose>
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2] and ./*[local-name()='day' and string-length(normalize-space(.)) = 2]">
<dc:date>
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])"/>
</dc:date>
</xsl:when>
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2]">
<dc:date>
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'])"/>
</dc:date>
</xsl:when>
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4]">
<dc:date>
<xsl:value-of select="./*[local-name()='year']"/>
</dc:date>
</xsl:when>
</xsl:choose>
</xsl:for-each>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()='meta-value'], //*[local-name()='permissions']/*[local-name()='copyright-statement']"/>
<xsl:with-param name="targetElement" select="'dc:rights'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='permissions']/*[local-name()='license']/@xlink:href"/>
<xsl:with-param name="targetElement" select="'oaf:license'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
<xsl:with-param name="targetElement" select="'dc:relation'"/>
</xsl:call-template>
<xsl:call-template name="identifiers">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
</xsl:call-template>
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri'][not(./@content-type = 'application/pdf')]/@xlink:href">
<oaf:identifier>
<xsl:attribute name="identifierType">
<xsl:text>landingPage</xsl:text>
</xsl:attribute>
<xsl:value-of select="."/>
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri' and ./@content-type='application/pdf' and //oaf:datasourceprefix = ('ambientesust', 'qualityinedu')]/@xlink:href/replace(., '/view/', '/download/')">
<oaf:fulltext>
<xsl:value-of select="."/>
</oaf:fulltext>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI) or ends-with(., $varFP7OtherDOI)]]">
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
<oaf:projectid>
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
<oaf:projectid>
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<!-- -->
<xsl:variable name='varRights' select="distinct-values((for $i in (
//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href,
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
and not( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
and not( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())])]/'open',
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
and (( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
or ( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())]))]/'embargo')
return TransformationFunction:convertString($tf, normalize-space($i), 'AccessRights')))" />
<!--
and not((xs:date( max( (start_date, '0001-01-01') ) ) gt current-date()))
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read' and and not((xs:date( max( (./@start_date, '0001-01-01') ) ) gt current-date()))]/'open'
-->
<oaf:accessrights>
<xsl:choose>
<xsl:when test="$varRights[. = 'EMBARGO']">
<xsl:value-of select="'EMBARGO'"/>
</xsl:when>
<xsl:when test="$varRights[. != 'UNKNOWN']">
<xsl:value-of select="$varRights[. != 'UNKNOWN'][1]"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$varRights[1]"/>
</xsl:otherwise>
</xsl:choose>
</oaf:accessrights>
<!--
<oaf:accessrights>
<xsl:value-of select="$varRights[1]"/>
</oaf:accessrights>
<xsl:element name="oaf:accessrights">
<xsl:value-of select="(//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href)/TransformationFunction:convertString($tf, ., 'AccessRights')" />
</xsl:element>
-->
<!--
<xsl:element name="dr:CobjCategory">
<xsl:variable name='varCobjCategory' select="TransformationFunction:convertString($tf, //*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()='meta-value'], 'TextTypologies')" />
<xsl:variable name='varSuperType' select="TransformationFunction:convertString($tf, $varCobjCategory, 'SuperTypes')" />
<xsl:attribute name="type" select="$varSuperType"/>
<xsl:value-of select="$varCobjCategory" />
</xsl:element>
<xsl:variable name='varCobjCatLst' select="for $i in (
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article']/@article-type)
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')" />
-->
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article']/@article-type))"/>
<xsl:variable name='varCobjCatLst' select="distinct-values((for $i in $varTypLst
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')))" />
<xsl:variable name='varCobjSupLst' select="for $i in $varCobjCatLst
return concat($i, '###', TransformationFunction:convertString($tf, normalize-space($i), 'SuperTypes'))" />
<dr:CobjCategory>
<xsl:choose>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="type" select="'other'"/>
<xsl:value-of select="'0000'" />
</xsl:otherwise>
</xsl:choose>
</dr:CobjCategory>
<!--
<xsl:for-each select="$varCobjSupLst">
<dc:type>
<xsl:value-of select="."/>
</dc:type>
</xsl:for-each>
-->
<xsl:for-each select="$varTypLst">
<dc:type>
<xsl:value-of select="."/>
</dc:type>
</xsl:for-each>
<!--
<xsl:for-each select="(//*[local-name()='article']/@article-type, //*[local-name() = 'custom-meta' and ./@specific-use = 'resource-type']/*[local-name() = ('meta-value', 'meta-name')])">
<dc:type>
<xsl:value-of select="."/>
</dc:type>
</xsl:for-each>
-->
<oaf:language>
<xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name()='metadata']//*[local-name()='article']/@xml:lang, 'Languages')" />
</oaf:language>
<!-- review status -->
<!-- ToDo:
review status
~ ask Journal.fi to put it elsewhere
~ evaluate article-version (no example found yet)
subject/kwd:
~ handle thesauri (no example found yet)
relations:
~ handle fn (no example found yet)
-->
<!--
<xsl:variable name="varRefereedConvt" select="for $i in (
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article']/@article-type)
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
-->
<xsl:variable name="varRefereedConvt" select="for $i in ($varTypLst)
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
<xsl:variable name="varRefereedSubjt" select="//*[local-name() = 'article-categories' and contains(//dri:recordIdentifier, 'oai:journal.fi')]/*[local-name() = 'subj-group' and ./@subj-group-type='heading']/*[local-name() = 'subject' and . = 'Peer reviewed articles']/'0001'"/>
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedSubjt)"/>
<!--
<oaf:refereed>
<xsl:value-of select="$varRefereedDescp"/>
</oaf:refereed>
<oaf:refereed>
<xsl:value-of select="$varRefereed"/>
</oaf:refereed>
<oaf:refereed>
<xsl:value-of select="count($varRefereed[. = '0001']) > 0"/>
</oaf:refereed>
-->
<xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) > 0">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) > 0">
<oaf:refereed>
<xsl:value-of select="'0002'"/>
</oaf:refereed>
</xsl:when>
</xsl:choose>
<xsl:call-template name="journal">
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
</xsl:call-template>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:collectedFrom>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template name="allElements">
<xsl:param name="sourceElement"/>
<xsl:param name="targetElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="{$targetElement}">
<xsl:if test="(.[@xml:lang] or ..[@xml:lang]) and $targetElement = ('dc:title', 'dc:description', 'dc:subject')">
<xsl:attribute name="xml:lang">
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
</xsl:attribute>
</xsl:if>
<xsl:value-of select="normalize-space(.)"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template name="title">
<xsl:param name="sourceElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="dc:title">
<xsl:if test=".[@xml:lang] or ..[@xml:lang]">
<xsl:attribute name="xml:lang">
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
</xsl:attribute>
</xsl:if>
<xsl:value-of select="string-join((., ./following-sibling::*[local-name() = ('subtitle', 'trans-subtitle')])/normalize-space(.), ': ')"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template name="journal">
<xsl:param name="journalTitle"/>
<xsl:param name="issn"/>
<xsl:param name="eissn"/>
<xsl:param name="vol"/>
<xsl:param name="issue"/>
<xsl:param name="sp"/>
<xsl:param name="ep"/>
<xsl:element name="oaf:journal">
<xsl:attribute name="issn">
<xsl:value-of select="normalize-space($issn)"/>
</xsl:attribute>
<xsl:attribute name="eissn">
<xsl:value-of select="normalize-space($eissn)"/>
</xsl:attribute>
<xsl:attribute name="vol">
<xsl:value-of select="normalize-space($vol)"/>
</xsl:attribute>
<xsl:attribute name="iss">
<xsl:value-of select="normalize-space($issue)"/>
</xsl:attribute>
<xsl:attribute name="sp">
<xsl:value-of select="normalize-space($sp)"/>
</xsl:attribute>
<xsl:attribute name="ep">
<xsl:value-of select="normalize-space($ep)"/>
</xsl:attribute>
<xsl:value-of select="normalize-space($journalTitle)"/>
</xsl:element>
</xsl:template>
<xsl:template name="identifiers">
<xsl:param name="sourceElement"/>
<xsl:if test="string-length($sourceElement[@pub-id-type='doi']) gt 0">
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>doi</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
</xsl:element>
</xsl:if>
</xsl:template>
<xsl:template name="authors">
<xsl:param name="sourceElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="dc:creator">
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
<xsl:attribute name="nameIdentifierScheme">
<xsl:text>ORCID</xsl:text>
</xsl:attribute>
<xsl:attribute name="schemeURI">
<xsl:text>http://orcid.org/</xsl:text>
</xsl:attribute>
<xsl:attribute name="nameIdentifier">
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
</xsl:attribute>
</xsl:if>
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate"/>
</xsl:element>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,437 @@
<!-- from production 2021-0614 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:transformExt="http://namespace.openaire.eu/java/org.apache.commons.codec.digest.DigestUtils"
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
extension-element-prefixes="transformExt TransformationFunction"
exclude-result-prefixes="transformExt TransformationFunction" >
<xsl:output indent="yes" omit-xml-declaration="yes"/>
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
<xsl:param name="varFP7" select="'corda_______::'"/>
<xsl:param name="varH2020" select="'corda__h2020::'"/>
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
<xsl:param name="index" select="0"/>
<xsl:param name="transDate" select="current-dateTime()"/>
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<metadata>
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
<xsl:call-template name="terminate"/>
</xsl:if>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0]"/>
<xsl:with-param name="targetElement" select="'dc:title'"/>
</xsl:call-template>
<xsl:call-template name="authors">
<!--
<xsl:with-param name="sourceElement" select="//*[local-name() = 'contrib'][@contrib-type='author']"/>
-->
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))][./*[local-name()='name'] or ./*[local-name()='name-alternatives']/*[local-name()='name']][string-length(.//*[local-name()='surname']) + string-length(.//*[local-name()='given-names']) > 0]"/>
</xsl:call-template>
<!-- <xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
</xsl:call-template>
-->
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()='abstract']"/>
<xsl:with-param name="targetElement" select="'dc:description'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
<xsl:with-param name="targetElement" select="'dc:subject'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group' and not(lower-case(@kwd-group-type)=('mesh', 'ocis'))]//*[local-name()='kwd']"/>
<xsl:with-param name="targetElement" select="'dc:subject'"/>
</xsl:call-template>
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='mesh' and ./*[local-name()='kwd']]">
<xsl:for-each select="./*[local-name()='kwd']">
<dc:subject>
<xsl:attribute name="subjectScheme" select="'mesh'"/>
<xsl:attribute name="schemeURI" select="'http://www.nlm.nih.gov/mesh/'"/>
<xsl:attribute name="valueURI" select="''"/>
<xsl:value-of select="./concat('mesh:', replace(., 'mesh (.*)$', '$1'))"/>
</dc:subject>
</xsl:for-each>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='kwd-group' and lower-case(@kwd-group-type)='ocis' and ./*[local-name()='kwd']]">
<xsl:for-each select="./*[local-name()='kwd']">
<dc:subject>
<xsl:attribute name="subjectScheme" select="'ocis'"/>
<xsl:attribute name="schemeURI" select="''"/>
<xsl:attribute name="valueURI" select="''"/>
<xsl:value-of select="./concat('ocis:', .)"/>
</dc:subject>
</xsl:for-each>
</xsl:for-each>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
<xsl:with-param name="targetElement" select="'dc:source'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/(*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version'], *[local-name() = 'article-version'])/concat('article-version (', @article-version-type, ') ', .)"/>
<xsl:with-param name="targetElement" select="'dc:source'"/>
</xsl:call-template>
<xsl:element name="dc:language">
<xsl:text>eng</xsl:text>
</xsl:element>
<xsl:element name="dc:identifier">
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
</xsl:element>
<xsl:element name="oaf:fulltext">
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()='article-id'][@pub-id-type='pmcid'])" />
</xsl:element>
<xsl:element name="oaf:dateAccepted">
<xsl:choose>
<xsl:when test="//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub'] or //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']" >
<xsl:if test="string(number($month)) eq 'NaN'" >
<xsl:value-of select="concat($year, '-', '01', '-', '01')" />
</xsl:if>
<xsl:if test="string(number($month)) != 'NaN'" >
<xsl:value-of select="concat($year, '-', $month, '-', '01')" />
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat(//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='ppub']/*[local-name()='year'], '-01-01')" />
</xsl:otherwise>
</xsl:choose>
</xsl:element>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='copyright-statement'])"/>
<xsl:with-param name="targetElement" select="'dc:rights'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()='permissions']/*[local-name()='license'])"/>
<xsl:with-param name="targetElement" select="'dc:rights'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
<xsl:with-param name="targetElement" select="'dc:relation'"/>
</xsl:call-template>
<xsl:call-template name="identifiers">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
</xsl:call-template>
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI)]]">
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
<oaf:projectid>
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
<oaf:projectid>
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<xsl:element name="oaf:accessrights">
<xsl:text>OPEN</xsl:text>
</xsl:element>
<xsl:element name="dr:CobjCategory">
<xsl:attribute name="type" select="'publication'"/>
<xsl:text>0001</xsl:text>
</xsl:element>
<dc:type>
<xsl:value-of select="//*[local-name() = 'article']/@article-type"/>
</dc:type>
<!-- custom-meta perhaps not used for types, then drop
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article']/@article-type))"/>
<xsl:variable name='varTypLst' select="//*[local-name() = 'article']/@article-type"/>
-->
<!-- perhaps ensure that file indeed exists, e.g. as pdf etc -->
<!--
// reduce load for the big PubMed records by exchanging variables with choose
<xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
<xsl:variable name="varRefereedFnote" select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
matches(lower-case(.), '.*refereed\s*anonymously.*') or
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*')
]/'0001'"/>
<xsl:variable name="varRefereedReviw" select="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
matches(lower-case(.), '.*peer\s*review\s*file.*')]/'0001'"/>
<xsl:variable name="varRefereedReltn" select="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]/'0002'"/>
<xsl:variable name="varRefereedCtRol" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]/'0001'"/>
<xsl:variable name="varRefereedVersn" select="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]/'0002'"/>
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedFnote, $varRefereedReviw, $varRefereedReltn, $varRefereedCtRol, $varRefereedVersn)"/>
<xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) > 0">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) > 0">
<oaf:refereed>
<xsl:value-of select="'0002'"/>
</oaf:refereed>
</xsl:when>
</xsl:choose>
-->
<xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = 'article']/@article-type, //oai:setSpec))
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
<xsl:choose>
<xsl:when test="count($varRefereedConvt[. = '0001']) > 0">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'article-id'][@pub-id-type='doi'][matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = ('fn-group', 'notes')][
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*review\s*information.*') or
matches(lower-case(.), '.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*') or
matches(lower-case(.), '.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*') or
matches(lower-case(.), '.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*') or
matches(lower-case(.), '.*peer[\.\-_/\s\(\)]*reviewed\s*by.*') or
matches(lower-case(.), '.*refereed\s*anonymously.*') or
matches(lower-case(.), '.*peer\s*reviewer\s*reports\s*are\s*available.*') or
matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\].*') or
matches(lower-case(.), '.*\[.*referees\s*:.*\].*') or
matches(lower-case(.), '^\s*plagiarism[\s\-\._]check.*') or
matches(lower-case(.), '^\s*peer[\s\-\._]*review.*') or
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)reviewer.*') or
matches(lower-case(.), '^\s*(open\s*peer[\s\-\._]*|p-)review\s*reports?.*')]">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = ('article-meta', 'app', 'app-group')]/*[local-name() = 'supplementary-material']/*[local-name() = 'media'][
matches(lower-case(.), '.*peer\s*review\s*file.*')]">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']
[./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or
./*[local-name() = 'contrib'][./@role/lower-case(.) = ('reviewer', 'solicited external reviewer') or ./*[local-name() = 'role' and lower-case(.) = ('reviewer', 'solicited external reviewer')] or ./@contrib-type/lower-case(.) = 'reviewer']]">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereedConvt[. = '0002']) > 0">
<oaf:refereed>
<xsl:value-of select="'0002'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = ('related-article')][./@related-article-type = ('peer-reviewed-article', 'reviewed-article')]">
<oaf:refereed>
<xsl:value-of select="'0002'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = 'article-meta'][./*[local-name() = 'article-version-alternatives']/*[local-name() = 'article-version' and . = 'preprint'] or ./*[local-name() = 'article-version' and . = 'preprint']]">
<oaf:refereed>
<xsl:value-of select="'0002'"/>
</oaf:refereed>
</xsl:when>
</xsl:choose>
<xsl:call-template name="journal">
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
</xsl:call-template>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="$varHostedByName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varHostedById"/>
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:collectedFrom>
<xsl:for-each select="//*[local-name() = 'article']/*[local-name() = ('back', 'front')]/*[local-name() = 'fn-group']/*[local-name() = 'fn'][matches(lower-case(.), 'country(/territory)? of origin:?\s*[A-Za-z\-]+')]">
<oaf:country>
<!--
<xsl:value-of select="TransformationFunction:convertString($tf, replace(lower-case(.), '^(.|\s)*country(/territory)? of origin:?\s+([A-Za-z\-,\(\)]+(\s+[A-Za-z\-,\(\)]+)*)(.|\s)*$', '$3'), 'Countries')"/>
-->
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(substring(substring-after(lower-case(.), 'of origin'), 2)), 'Countries')"/>
</oaf:country>
</xsl:for-each>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template name="allElements">
<xsl:param name="sourceElement"/>
<xsl:param name="targetElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="{$targetElement}">
<xsl:value-of select="normalize-space(.)"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template name="journal">
<xsl:param name="journalTitle"/>
<xsl:param name="issn"/>
<xsl:param name="eissn"/>
<xsl:param name="vol"/>
<xsl:param name="issue"/>
<xsl:param name="sp"/>
<xsl:param name="ep"/>
<xsl:element name="oaf:journal">
<xsl:attribute name="issn">
<xsl:value-of select="normalize-space($issn)"/>
</xsl:attribute>
<xsl:attribute name="eissn">
<xsl:value-of select="normalize-space($eissn)"/>
</xsl:attribute>
<xsl:attribute name="vol">
<xsl:value-of select="normalize-space($vol)"/>
</xsl:attribute>
<xsl:attribute name="iss">
<xsl:value-of select="normalize-space($issue)"/>
</xsl:attribute>
<xsl:attribute name="sp">
<xsl:value-of select="normalize-space($sp)"/>
</xsl:attribute>
<xsl:attribute name="ep">
<xsl:value-of select="normalize-space($ep)"/>
</xsl:attribute>
<xsl:value-of select="normalize-space($journalTitle)"/>
</xsl:element>
</xsl:template>
<xsl:template name="identifiers">
<xsl:param name="sourceElement"/>
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>doi</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
</xsl:element>
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>pmc</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type='pmcid']"/>
</xsl:element>
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>pmid</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type='pmid']"/>
</xsl:element>
</xsl:template>
<xsl:template name="authors">
<xsl:param name="sourceElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="dc:creator">
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
<xsl:attribute name="nameIdentifierScheme">
<xsl:text>ORCID</xsl:text>
</xsl:attribute>
<xsl:attribute name="schemeURI">
<xsl:text>http://orcid.org/</xsl:text>
</xsl:attribute>
<xsl:attribute name="nameIdentifier">
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
</xsl:attribute>
</xsl:if>
<!--
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
-->
<xsl:value-of select="concat(normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='surname']), ', ', normalize-space(./(*[local-name()='name'], *[local-name()='name-alternatives']/*[local-name()='name'])/*[local-name()='given-names']))"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate"/>
</xsl:element>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,493 @@
<!-- from PROD 2021-06-14 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
exclude-result-prefixes="xsl vocabulary dateCleaner"
version="2.0">
<!--
<xsl:param name="varHostedById" select="'opendoar____::908'"/>
<xsl:param name="varHostedByName" select="'Europe PubMed Central'"/>
-->
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7FundRefDOI" select="'10.13039/501100004963'"/>
<xsl:param name="varFP7OtherDOI" select="'10.13039/100011102'"/>
<xsl:param name="varH2020FundRefDOI" select="'10.13039/501100007601'"/>
<xsl:param name="varFP7" select="'corda_______::'"/>
<xsl:param name="varH2020" select="'corda__h2020::'"/>
<xsl:param name="epmcUrlPrefix" select="'http://europepmc.org/articles/'" />
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
<xsl:param name="index" select="0"/>
<xsl:param name="transDate" select="current-dateTime()"/>
<xsl:variable name="year" select="format-number( ( //*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='year'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='year']), '0000')" />
<xsl:variable name="month" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='month'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='month']), '00')" />
<xsl:variable name="day" select="format-number( (//*[local-name()='article-meta']//*[local-name()='pub-date'][@pub-type='epub']/*[local-name()='day'] | //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='electronic']/*[local-name()='day']), '00')" />
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<metadata>
<xsl:if test="not(//*[local-name() = 'article-meta']//*[local-name()='article-title'][string-length(normalize-space(.))> 0])">
<xsl:call-template name="terminate"/>
</xsl:if>
<!-- in journal.fi xml:lang of translated titles is not within the trans-title element but within the surrounding trans-title-group element (which just contains 1 trans-title element) -->
<!--
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']//*[local-name()=('article-title', 'trans-title-group')][string-length(normalize-space(.))> 0]"/>
<xsl:with-param name="targetElement" select="'dc:title'"/>
</xsl:call-template>
-->
<xsl:call-template name="title">
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name()='title-group']//*[local-name()=('article-title', 'trans-title', 'subtitle', 'trans-subtitle')]"/>
</xsl:call-template>
<xsl:call-template name="authors">
<!--
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group']/*[local-name() = 'contrib'][@contrib-type='author'][not(exists(child::*:collab))]"/>
-->
<xsl:with-param name="sourceElement" select="//*[local-name() = 'article-meta']/*[local-name() = 'contrib-group'][@content-type='author']/*[local-name() = 'contrib']"/>
</xsl:call-template>
<!-- <xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
</xsl:call-template>
-->
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-meta']/*[local-name()=('abstract', 'trans-abstract')]"/>
<xsl:with-param name="targetElement" select="'dc:description'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-categories']//*[local-name()='subject']"/>
<xsl:with-param name="targetElement" select="'dc:subject'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='kwd-group']//*[local-name()='kwd']"/>
<xsl:with-param name="targetElement" select="'dc:subject'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='publisher']/*[local-name()='publisher-name']"/>
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
<xsl:with-param name="targetElement" select="'dc:source'"/>
</xsl:call-template>
<xsl:element name="dc:language">
<xsl:value-of select="//*[local-name()='metadata']//*[local-name()='article']/@xml:lang" />
</xsl:element>
<xsl:element name="dc:identifier">
<xsl:value-of select="//*[local-name()='article-meta']/*[local-name()='self-uri'][contains(./@xlink:href, '/view/')]/@xlink:href" />
</xsl:element>
<xsl:element name="oaf:dateAccepted">
<!--
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])" />
<xsl:value-of select="TransformationFunction:Convert($tf, //*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub'], 'DateISO8601', 'yyyy-MM-dd', 'min()')" />
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/replace(concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day']), '-(\d)([-$])', '-0$1$2')" />
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
concat(./*[local-name()='year'], '-',
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']), 2), '-',
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']), 2))" />
-->
<xsl:value-of select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub' and string-length(normalize-space(./*[local-name()='year'])) = 4]/
concat(./*[local-name()='year'], '-',
substring(concat('0', ./*[local-name()='month'], '1'), string-length(./*[local-name()='month']) idiv 2 + 1, 2), '-',
substring(concat('0', ./*[local-name()='day'], '1'), string-length(./*[local-name()='day']) idiv 2 +1, 2))" />
</xsl:element>
<xsl:for-each select="//*[local-name()='article-meta']//*[local-name()='pub-date'][@date-type='pub' and @publication-format='epub']">
<xsl:choose>
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2] and ./*[local-name()='day' and string-length(normalize-space(.)) = 2]">
<dc:date>
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'], '-', ./*[local-name()='day'])"/>
</dc:date>
</xsl:when>
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4] and ./*[local-name()='month' and string-length(normalize-space(.)) = 2]">
<dc:date>
<xsl:value-of select="concat(./*[local-name()='year'], '-', ./*[local-name()='month'])"/>
</dc:date>
</xsl:when>
<xsl:when test="./*[local-name()='year' and string-length(normalize-space(.)) = 4]">
<dc:date>
<xsl:value-of select="./*[local-name()='year']"/>
</dc:date>
</xsl:when>
</xsl:choose>
</xsl:for-each>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()='meta-value'], //*[local-name()='permissions']/*[local-name()='copyright-statement']"/>
<xsl:with-param name="targetElement" select="'dc:rights'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='permissions']/*[local-name()='license']/@xlink:href"/>
<xsl:with-param name="targetElement" select="'oaf:license'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()='fn-group']//*[local-name()='fn']"/>
<xsl:with-param name="targetElement" select="'dc:relation'"/>
</xsl:call-template>
<xsl:call-template name="identifiers">
<xsl:with-param name="sourceElement" select="//*[local-name()='article-id']"/>
</xsl:call-template>
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri'][not(./@content-type = 'application/pdf')]/@xlink:href">
<oaf:identifier>
<xsl:attribute name="identifierType">
<xsl:text>landingPage</xsl:text>
</xsl:attribute>
<xsl:value-of select="."/>
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='article-meta']/*[local-name()='self-uri' and ./@content-type='application/pdf' and //oaf:datasourceprefix = ('ambientesust', 'qualityinedu')]/@xlink:href/replace(., '/view/', '/download/')">
<oaf:fulltext>
<xsl:value-of select="."/>
</oaf:fulltext>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varFP7FundRefDOI) or ends-with(., $varFP7OtherDOI)]]">
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
<oaf:projectid>
<xsl:value-of select="concat($varFP7, ./*[local-name()='award-id'])"/>
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<xsl:for-each select="//*[local-name()='award-group'][.//*[local-name()='institution-id'][ends-with(., $varH2020FundRefDOI)]]">
<xsl:if test="./*[local-name()='award-id'][matches(normalize-space(.), '(^\d\d\d\d\d\d$)', 'i')]">
<oaf:projectid>
<xsl:value-of select="concat($varH2020, ./*[local-name()='award-id'])"/>
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<!-- -->
<xsl:variable name='varRights' select="distinct-values((for $i in (
//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href,
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
and not( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
and not( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())])]/'open',
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read'
and (( ./@start_date[(xs:date( max( (string(.), '0001-01-01') ) ) gt current-date())])
or ( ./@end_date[(xs:date( max( (string(.), '0001-01-01') ) ) lt current-date())]))]/'embargo')
return vocabulary:clean( normalize-space($i), 'dnet:access_modes') "
/>
<!--
and not((xs:date( max( (start_date, '0001-01-01') ) ) gt current-date()))
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'free_to_read' and and not((xs:date( max( (./@start_date, '0001-01-01') ) ) gt current-date()))]/'open'
-->
<oaf:accessrights>
<xsl:choose>
<xsl:when test="$varRights[. = 'EMBARGO']">
<xsl:value-of select="'EMBARGO'"/>
</xsl:when>
<xsl:when test="$varRights[. != 'UNKNOWN']">
<xsl:value-of select="$varRights[. != 'UNKNOWN'][1]"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$varRights[1]"/>
</xsl:otherwise>
</xsl:choose>
</oaf:accessrights>
<!--
<oaf:accessrights>
<xsl:value-of select="$varRights[1]"/>
</oaf:accessrights>
<xsl:element name="oaf:accessrights">
<xsl:value-of select="(//*[local-name()='custom-meta-group']/*[local-name()='custom-meta'][./@specific-use='access-right']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article-meta']/*[local-name() = 'permissions']/*[local-name() = 'license']/@xlink:href)/TransformationFunction:convertString($tf, ., 'AccessRights')" />
</xsl:element>
-->
<!--
<xsl:element name="dr:CobjCategory">
<xsl:variable name='varCobjCategory' select="TransformationFunction:convertString($tf, //*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()='meta-value'], 'TextTypologies')" />
<xsl:variable name='varSuperType' select="TransformationFunction:convertString($tf, $varCobjCategory, 'SuperTypes')" />
<xsl:attribute name="type" select="$varSuperType"/>
<xsl:value-of select="$varCobjCategory" />
</xsl:element>
<xsl:variable name='varCobjCatLst' select="for $i in (
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article']/@article-type)
return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')" />
-->
<xsl:variable name='varTypLst' select="distinct-values((//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article']/@article-type))"/>
<xsl:variable name='varCobjCatLst' select="distinct-values((for $i in $varTypLst
return vocabulary:clean( normalize-space($i), 'dnet:dnet:publication_resource')))" />
<xsl:variable name='varCobjSupLst' select="for $i in $varCobjCatLst
return concat($i, '###', vocabulary:clean( normalize-space($i), 'dnet:result_typologies'))" />
<dr:CobjCategory>
<xsl:choose>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) > 0">
<xsl:variable name='varCobjSup' select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')"/>
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="type" select="'other'"/>
<xsl:value-of select="'0000'" />
</xsl:otherwise>
</xsl:choose>
</dr:CobjCategory>
<!--
<xsl:for-each select="$varCobjSupLst">
<dc:type>
<xsl:value-of select="."/>
</dc:type>
</xsl:for-each>
-->
<xsl:for-each select="$varTypLst">
<dc:type>
<xsl:value-of select="."/>
</dc:type>
</xsl:for-each>
<!--
<xsl:for-each select="(//*[local-name()='article']/@article-type, //*[local-name() = 'custom-meta' and ./@specific-use = 'resource-type']/*[local-name() = ('meta-value', 'meta-name')])">
<dc:type>
<xsl:value-of select="."/>
</dc:type>
</xsl:for-each>
-->
<oaf:language>
<xsl:value-of select="vocabulary:clean( //*[local-name()='metadata']//*[local-name()='article']/@xml:lang, 'dnet:languages')" />
</oaf:language>
<!-- review status -->
<!-- ToDo:
review status
~ ask Journal.fi to put it elsewhere
~ evaluate article-version (no example found yet)
subject/kwd:
~ handle thesauri (no example found yet)
relations:
~ handle fn (no example found yet)
-->
<!--
<xsl:variable name="varRefereedConvt" select="for $i in (
//*[local-name() = 'article-meta']/*[local-name() = 'custom-meta-group']/*[local-name() = 'custom-meta'][./@specific-use='resource-type']/*[local-name()=('meta-value', 'meta-name')],
//*[local-name() = 'article']/@article-type)
return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')"/>
-->
<xsl:variable name="varRefereedConvt" select="for $i in ($varTypLst)
return vocabulary:clean( normalize-space($i), 'dnet:review_levels')"/>
<xsl:variable name="varRefereedDescp" select="//*[local-name() = 'article-meta']/*[local-name() = ('abstract', 'trans-abstract')][matches(lower-case(.), '^\s*(.p.\s*)?refereed\s*article.*')]/'0001'"/>
<xsl:variable name="varRefereedSubjt" select="//*[local-name() = 'article-categories' and contains(//dri:recordIdentifier, 'oai:journal.fi')]/*[local-name() = 'subj-group' and ./@subj-group-type='heading']/*[local-name() = 'subject' and . = 'Peer reviewed articles']/'0001'"/>
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedDescp, $varRefereedSubjt)"/>
<!--
<oaf:refereed>
<xsl:value-of select="$varRefereedDescp"/>
</oaf:refereed>
<oaf:refereed>
<xsl:value-of select="$varRefereed"/>
</oaf:refereed>
<oaf:refereed>
<xsl:value-of select="count($varRefereed[. = '0001']) > 0"/>
</oaf:refereed>
-->
<xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) > 0">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) > 0">
<oaf:refereed>
<xsl:value-of select="'0002'"/>
</oaf:refereed>
</xsl:when>
</xsl:choose>
<xsl:call-template name="journal">
<xsl:with-param name="journalTitle" select="//*[local-name()='journal-meta']//*[local-name()='journal-title']"/>
<xsl:with-param name="issn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='ppub']"/>
<xsl:with-param name="eissn" select="//*[local-name()='journal-meta']/*[local-name()='issn'][@pub-type='epub']"/>
<xsl:with-param name="vol" select="//*[local-name()='article-meta']/*[local-name()='volume']"/>
<xsl:with-param name="issue" select="//*[local-name()='article-meta']/*[local-name()='issue']"/>
<xsl:with-param name="sp" select="//*[local-name()='article-meta']/*[local-name()='fpage']"/>
<xsl:with-param name="ep" select="//*[local-name()='article-meta']/*[local-name()='lpage']"/>
</xsl:call-template>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:collectedFrom>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template name="allElements">
<xsl:param name="sourceElement"/>
<xsl:param name="targetElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="{$targetElement}">
<xsl:if test="(.[@xml:lang] or ..[@xml:lang]) and $targetElement = ('dc:title', 'dc:description', 'dc:subject')">
<xsl:attribute name="xml:lang">
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
</xsl:attribute>
</xsl:if>
<xsl:value-of select="normalize-space(.)"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template name="title">
<xsl:param name="sourceElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="dc:title">
<xsl:if test=".[@xml:lang] or ..[@xml:lang]">
<xsl:attribute name="xml:lang">
<xsl:value-of select="(./@xml:lang, ../@xml:lang)[1]"/>
</xsl:attribute>
</xsl:if>
<xsl:value-of select="string-join((., ./following-sibling::*[local-name() = ('subtitle', 'trans-subtitle')])/normalize-space(.), ': ')"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template name="journal">
<xsl:param name="journalTitle"/>
<xsl:param name="issn"/>
<xsl:param name="eissn"/>
<xsl:param name="vol"/>
<xsl:param name="issue"/>
<xsl:param name="sp"/>
<xsl:param name="ep"/>
<xsl:element name="oaf:journal">
<xsl:attribute name="issn">
<xsl:value-of select="normalize-space($issn)"/>
</xsl:attribute>
<xsl:attribute name="eissn">
<xsl:value-of select="normalize-space($eissn)"/>
</xsl:attribute>
<xsl:attribute name="vol">
<xsl:value-of select="normalize-space($vol)"/>
</xsl:attribute>
<xsl:attribute name="iss">
<xsl:value-of select="normalize-space($issue)"/>
</xsl:attribute>
<xsl:attribute name="sp">
<xsl:value-of select="normalize-space($sp)"/>
</xsl:attribute>
<xsl:attribute name="ep">
<xsl:value-of select="normalize-space($ep)"/>
</xsl:attribute>
<xsl:value-of select="normalize-space($journalTitle)"/>
</xsl:element>
</xsl:template>
<xsl:template name="identifiers">
<xsl:param name="sourceElement"/>
<xsl:if test="string-length($sourceElement[@pub-id-type='doi']) gt 0">
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>doi</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type='doi']"/>
</xsl:element>
</xsl:if>
</xsl:template>
<xsl:template name="authors">
<xsl:param name="sourceElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="dc:creator">
<xsl:if test="./*[local-name()='contrib-id'][@contrib-id-type='orcid']">
<xsl:attribute name="nameIdentifierScheme">
<xsl:text>ORCID</xsl:text>
</xsl:attribute>
<xsl:attribute name="schemeURI">
<xsl:text>http://orcid.org/</xsl:text>
</xsl:attribute>
<xsl:attribute name="nameIdentifier">
<xsl:value-of select="substring-after(./*[local-name()='contrib-id'][@contrib-id-type='orcid'], 'http://orcid.org/')"/>
</xsl:attribute>
</xsl:if>
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate"/>
</xsl:element>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,373 @@
<!-- for adaptation , 2021-06-14 PROD -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
exclude-result-prefixes="xsl vocabulary dateCleaner"
version="2.0">
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<xsl:param name="varDataSourceId" />
<xsl:output indent="yes" omit-xml-declaration="yes" />
<xsl:param name="varHostedById" select="&apos;opendoar____::908&apos;" />
<xsl:param name="varHostedByName" select="&apos;Europe PubMed Central&apos;" />
<xsl:param name="varFP7FundRefDOI" select="&apos;10.13039/501100004963&apos;" />
<xsl:param name="varH2020FundRefDOI" select="&apos;10.13039/501100007601&apos;" />
<xsl:param name="varFP7" select="&apos;corda_______::&apos;" />
<xsl:param name="varH2020" select="&apos;corda__h2020::&apos;" />
<xsl:param name="epmcUrlPrefix" select="&apos;http://europepmc.org/articles/&apos;" />
<xsl:param name="repoCode" select="substring-before(//*[local-name() = &apos;header&apos;]/*[local-name()=&apos;recordIdentifier&apos;], &apos;:&apos;)" />
<xsl:param name="index" select="0" />
<xsl:param name="transDate" select="current-dateTime()" />
<xsl:variable name="year" select="format-number( ( //*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@pub-type=&apos;epub&apos;]/*[local-name()=&apos;year&apos;] | //*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@date-type=&apos;pub&apos; and @publication-format=&apos;electronic&apos;]/*[local-name()=&apos;year&apos;]), &apos;0000&apos;)" />
<xsl:variable name="month" select="format-number( (//*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@pub-type=&apos;epub&apos;]/*[local-name()=&apos;month&apos;] | //*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@date-type=&apos;pub&apos; and @publication-format=&apos;electronic&apos;]/*[local-name()=&apos;month&apos;]), &apos;00&apos;)" />
<xsl:variable name="day" select="format-number( (//*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@pub-type=&apos;epub&apos;]/*[local-name()=&apos;day&apos;] | //*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@date-type=&apos;pub&apos; and @publication-format=&apos;electronic&apos;]/*[local-name()=&apos;day&apos;]), &apos;00&apos;)" />
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<record>
<xsl:apply-templates select="//*[local-name() = &apos;header&apos;]" />
<metadata>
<xsl:if test="not(//*[local-name() = &apos;article-meta&apos;]//*[local-name()=&apos;article-title&apos;][string-length(normalize-space(.))&gt; 0])">
<xsl:call-template name="terminate" />
</xsl:if>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name() = &apos;article-meta&apos;]//*[local-name()=&apos;article-title&apos;][string-length(normalize-space(.))&gt; 0]" />
<xsl:with-param name="targetElement" select="&apos;dc:title&apos;" />
</xsl:call-template>
<xsl:call-template name="authors">
<!--
<xsl:with-param name="sourceElement" select="//*[local-name() = 'contrib'][@contrib-type='author']"/>
-->
<xsl:with-param name="sourceElement" select="//*[local-name() = &apos;article-meta&apos;]/*[local-name() = &apos;contrib-group&apos;]/*[local-name() = &apos;contrib&apos;][@contrib-type=&apos;author&apos;][not(exists(child::*:collab))][./*[local-name()=&apos;name&apos;] or ./*[local-name()=&apos;name-alternatives&apos;]/*[local-name()=&apos;name&apos;]][string-length(.//*[local-name()=&apos;surname&apos;]) + string-length(.//*[local-name()=&apos;given-names&apos;]) &gt; 0]" />
</xsl:call-template> <!-- <xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
</xsl:call-template>
-->
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()=&apos;article-meta&apos;]/*[local-name()=&apos;abstract&apos;]" />
<xsl:with-param name="targetElement" select="&apos;dc:description&apos;" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()=&apos;article-categories&apos;]//*[local-name()=&apos;subject&apos;]" />
<xsl:with-param name="targetElement" select="&apos;dc:subject&apos;" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()=&apos;kwd-group&apos; and not(lower-case(@kwd-group-type)=(&apos;mesh&apos;, &apos;ocis&apos;))]//*[local-name()=&apos;kwd&apos;]" />
<xsl:with-param name="targetElement" select="&apos;dc:subject&apos;" />
</xsl:call-template>
<xsl:for-each select="//*[local-name()=&apos;kwd-group&apos; and lower-case(@kwd-group-type)=&apos;mesh&apos; and ./*[local-name()=&apos;kwd&apos;]]">
<xsl:for-each select="./*[local-name()=&apos;kwd&apos;]">
<dc:subject>
<xsl:attribute name="subjectScheme" select="&apos;mesh&apos;" />
<xsl:attribute name="schemeURI" select="&apos;http://www.nlm.nih.gov/mesh/&apos;" />
<xsl:attribute name="valueURI" select="&apos;&apos;" />
<xsl:value-of select="./concat(&apos;mesh:&apos;, replace(., &apos;mesh (.*)$&apos;, &apos;$1&apos;))" />
</dc:subject>
</xsl:for-each>
</xsl:for-each>
<xsl:for-each select="//*[local-name()=&apos;kwd-group&apos; and lower-case(@kwd-group-type)=&apos;ocis&apos; and ./*[local-name()=&apos;kwd&apos;]]">
<xsl:for-each select="./*[local-name()=&apos;kwd&apos;]">
<dc:subject>
<xsl:attribute name="subjectScheme" select="&apos;ocis&apos;" />
<xsl:attribute name="schemeURI" select="&apos;&apos;" />
<xsl:attribute name="valueURI" select="&apos;&apos;" />
<xsl:value-of select="./concat(&apos;ocis:&apos;, .)" />
</dc:subject>
</xsl:for-each>
</xsl:for-each>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()=&apos;publisher&apos;]/*[local-name()=&apos;publisher-name&apos;]" />
<xsl:with-param name="targetElement" select="&apos;dc:publisher&apos;" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()=&apos;journal-meta&apos;]//*[local-name()=&apos;journal-title&apos;]" />
<xsl:with-param name="targetElement" select="&apos;dc:source&apos;" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name() = &apos;article-meta&apos;]/(*[local-name() = &apos;article-version-alternatives&apos;]/*[local-name() = &apos;article-version&apos;], *[local-name() = &apos;article-version&apos;])/concat(&apos;article-version (&apos;, @article-version-type, &apos;) &apos;, .)" />
<xsl:with-param name="targetElement" select="&apos;dc:source&apos;" />
</xsl:call-template>
<xsl:element name="dc:language">
<xsl:text>eng</xsl:text>
</xsl:element>
<xsl:element name="dc:identifier">
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()=&apos;article-id&apos;][@pub-id-type=&apos;pmcid&apos;])" />
</xsl:element>
<xsl:element name="oaf:fulltext">
<xsl:value-of select="concat($epmcUrlPrefix, //*[local-name()=&apos;article-id&apos;][@pub-id-type=&apos;pmcid&apos;])" />
</xsl:element>
<xsl:element name="oaf:dateAccepted">
<xsl:choose>
<xsl:when test="//*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@pub-type=&apos;epub&apos;] or //*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@date-type=&apos;pub&apos; and @publication-format=&apos;electronic&apos;]">
<xsl:if test="string(number($month)) eq &apos;NaN&apos;">
<xsl:value-of select="concat($year, &apos;-&apos;, &apos;01&apos;, &apos;-&apos;, &apos;01&apos;)" />
</xsl:if>
<xsl:if test="string(number($month)) != &apos;NaN&apos;">
<xsl:value-of select="concat($year, &apos;-&apos;, $month, &apos;-&apos;, &apos;01&apos;)" />
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat(//*[local-name()=&apos;article-meta&apos;]//*[local-name()=&apos;pub-date&apos;][@pub-type=&apos;ppub&apos;]/*[local-name()=&apos;year&apos;], &apos;-01-01&apos;)" />
</xsl:otherwise>
</xsl:choose>
</xsl:element>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()=&apos;permissions&apos;]/*[local-name()=&apos;copyright-statement&apos;])" />
<xsl:with-param name="targetElement" select="&apos;dc:rights&apos;" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="distinct-values(//*[local-name()=&apos;permissions&apos;]/*[local-name()=&apos;license&apos;])" />
<xsl:with-param name="targetElement" select="&apos;dc:rights&apos;" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//*[local-name()=&apos;fn-group&apos;]//*[local-name()=&apos;fn&apos;]" />
<xsl:with-param name="targetElement" select="&apos;dc:relation&apos;" />
</xsl:call-template>
<xsl:call-template name="identifiers">
<xsl:with-param name="sourceElement" select="//*[local-name()=&apos;article-id&apos;]" />
</xsl:call-template>
<xsl:for-each select="//*[local-name()=&apos;award-group&apos;][.//*[local-name()=&apos;institution-id&apos;][ends-with(., $varFP7FundRefDOI)]]">
<xsl:if test="./*[local-name()=&apos;award-id&apos;][matches(normalize-space(.), &apos;(^\d\d\d\d\d\d$)&apos;, &apos;i&apos;)]">
<oaf:projectid>
<xsl:value-of select="concat($varFP7, ./*[local-name()=&apos;award-id&apos;])" />
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<xsl:for-each select="//*[local-name()=&apos;award-group&apos;][.//*[local-name()=&apos;institution-id&apos;][ends-with(., $varH2020FundRefDOI)]]">
<xsl:if test="./*[local-name()=&apos;award-id&apos;][matches(normalize-space(.), &apos;(^\d\d\d\d\d\d$)&apos;, &apos;i&apos;)]">
<oaf:projectid>
<xsl:value-of select="concat($varH2020, ./*[local-name()=&apos;award-id&apos;])" />
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<xsl:element name="oaf:accessrights">
<xsl:text>OPEN</xsl:text>
</xsl:element>
<xsl:element name="dr:CobjCategory">
<xsl:attribute name="type" select="&apos;publication&apos;" />
<xsl:text>0001</xsl:text>
</xsl:element>
<dc:type>
<xsl:value-of select="//*[local-name() = &apos;article&apos;]/@article-type" />
</dc:type>
<xsl:variable name="varRefereedConvt" select="for $i in (//*[local-name() = 'resource']/*[local-name() = ('resourceType', 'version')]/(., @uri))
return vocabulary:clean( normalize-space($i), 'dnet:review_levels')"/>
<!-- <xsl:variable name="varRefereedConvt" select="for $i in distinct-values((//*[local-name() = &apos;article&apos;]/@article-type, //oai:setSpec))
return TransformationFunction:convertString($tf, normalize-space($i), &apos;ReviewLevels&apos;)" />
-->
<xsl:choose>
<xsl:when test="count($varRefereedConvt[. = &apos;0001&apos;]) &gt; 0">
<oaf:refereed>
<xsl:value-of select="&apos;0001&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = &apos;article-meta&apos;]/*[local-name() = &apos;article-id&apos;][@pub-id-type=&apos;doi&apos;][matches(., &apos;^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$&apos;)]">
<oaf:refereed>
<xsl:value-of select="&apos;0001&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = &apos;article-meta&apos;]/*[local-name() = (&apos;abstract&apos;, &apos;trans-abstract&apos;)][matches(lower-case(.), &apos;^\s*(.p.\s*)?refereed\s*article.*&apos;)]">
<oaf:refereed>
<xsl:value-of select="&apos;0001&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = &apos;article&apos;]/*[local-name() = (&apos;back&apos;, &apos;front&apos;)]/*[local-name() = (&apos;fn-group&apos;, &apos;notes&apos;)][
matches(lower-case(.), &apos;.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*&apos;) or
matches(lower-case(.), &apos;.*peer[\.\-_/\s\(\)]*review\s*information.*&apos;) or
matches(lower-case(.), &apos;.*the\s*peer[\.\-_/\s\(\)]*review\s*history\s*for\s*this\s*article\s*is\s*available\s*at .*&apos;) or
matches(lower-case(.), &apos;.*provenance\s*and\s*peer[\.\-_/\s\(\)]*review.*&apos;) or
matches(lower-case(.), &apos;.*externally\s*peer[\.\-_/\s\(\)]*reviewed.*&apos;) or
matches(lower-case(.), &apos;.*peer[\.\-_/\s\(\)]*reviewed\s*by.*&apos;) or
matches(lower-case(.), &apos;.*refereed\s*anonymously.*&apos;) or
matches(lower-case(.), &apos;.*peer\s*reviewer\s*reports\s*are\s*available.*&apos;) or
matches(lower-case(.), &apos;.*\[.*peer[\s\-\._]*review\s*:.*\].*&apos;) or
matches(lower-case(.), &apos;.*\[.*referees\s*:.*\].*&apos;) or
matches(lower-case(.), &apos;^\s*plagiarism[\s\-\._]check.*&apos;) or
matches(lower-case(.), &apos;^\s*peer[\s\-\._]*review.*&apos;) or
matches(lower-case(.), &apos;^\s*(open\s*peer[\s\-\._]*|p-)reviewer.*&apos;) or
matches(lower-case(.), &apos;^\s*(open\s*peer[\s\-\._]*|p-)review\s*reports?.*&apos;)]">
<oaf:refereed>
<xsl:value-of select="&apos;0001&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = (&apos;article-meta&apos;, &apos;app&apos;, &apos;app-group&apos;)]/*[local-name() = &apos;supplementary-material&apos;]/*[local-name() = &apos;media&apos;][
matches(lower-case(.), &apos;.*peer\s*review\s*file.*&apos;)]">
<oaf:refereed>
<xsl:value-of select="&apos;0001&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = &apos;article-meta&apos;]/*[local-name() = &apos;contrib-group&apos;]
[./@role/lower-case(.) = (&apos;reviewer&apos;, &apos;solicited external reviewer&apos;) or
./*[local-name() = &apos;contrib&apos;][./@role/lower-case(.) = (&apos;reviewer&apos;, &apos;solicited external reviewer&apos;) or ./*[local-name() = &apos;role&apos; and lower-case(.) = (&apos;reviewer&apos;, &apos;solicited external reviewer&apos;)] or ./@contrib-type/lower-case(.) = &apos;reviewer&apos;]]">
<oaf:refereed>
<xsl:value-of select="&apos;0001&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereedConvt[. = &apos;0002&apos;]) &gt; 0">
<oaf:refereed>
<xsl:value-of select="&apos;0002&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = (&apos;related-article&apos;)][./@related-article-type = (&apos;peer-reviewed-article&apos;, &apos;reviewed-article&apos;)]">
<oaf:refereed>
<xsl:value-of select="&apos;0002&apos;" />
</oaf:refereed>
</xsl:when>
<xsl:when test="//*[local-name() = &apos;article-meta&apos;][./*[local-name() = &apos;article-version-alternatives&apos;]/*[local-name() = &apos;article-version&apos; and . = &apos;preprint&apos;] or ./*[local-name() = &apos;article-version&apos; and . = &apos;preprint&apos;]]">
<oaf:refereed>
<xsl:value-of select="&apos;0002&apos;" />
</oaf:refereed>
</xsl:when>
</xsl:choose>
<xsl:call-template name="journal">
<xsl:with-param name="journalTitle" select="//*[local-name()=&apos;journal-meta&apos;]//*[local-name()=&apos;journal-title&apos;]" />
<xsl:with-param name="issn" select="//*[local-name()=&apos;journal-meta&apos;]/*[local-name()=&apos;issn&apos;][@pub-type=&apos;ppub&apos;]" />
<xsl:with-param name="eissn" select="//*[local-name()=&apos;journal-meta&apos;]/*[local-name()=&apos;issn&apos;][@pub-type=&apos;epub&apos;]" />
<xsl:with-param name="vol" select="//*[local-name()=&apos;article-meta&apos;]/*[local-name()=&apos;volume&apos;]" />
<xsl:with-param name="issue" select="//*[local-name()=&apos;article-meta&apos;]/*[local-name()=&apos;issue&apos;]" />
<xsl:with-param name="sp" select="//*[local-name()=&apos;article-meta&apos;]/*[local-name()=&apos;fpage&apos;]" />
<xsl:with-param name="ep" select="//*[local-name()=&apos;article-meta&apos;]/*[local-name()=&apos;lpage&apos;]" />
</xsl:call-template>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="$varHostedByName" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varHostedById" />
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId" />
</xsl:attribute>
</oaf:collectedFrom>
<xsl:for-each select="//*[local-name() = &apos;article&apos;]/*[local-name() = (&apos;back&apos;, &apos;front&apos;)]/*[local-name() = &apos;fn-group&apos;]/*[local-name() = &apos;fn&apos;][matches(lower-case(.), &apos;country(/territory)? of origin:?\s*[A-Za-z\-]+&apos;)]">
<oaf:country>
<!--
<xsl:value-of select="TransformationFunction:convertString($tf, replace(lower-case(.), '^(.|\s)*country(/territory)? of origin:?\s+([A-Za-z\-,\(\)]+(\s+[A-Za-z\-,\(\)]+)*)(.|\s)*$', '$3'), 'Countries')"/>
-->
<!-- ACz, 2021-06-14
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(substring(substring-after(lower-case(.), &apos;of origin&apos;), 2)), &apos;Countries&apos;)" />
-->
<xsl:value-of select="vocabulary:clean( normalize-space(substring(substring-after(lower-case(.), &apos;of origin&apos;), 2)), 'dnet:countries')"/>
</oaf:country>
</xsl:for-each>
</metadata>
<xsl:copy-of select="//*[local-name() = &apos;about&apos;]" />
</record>
</xsl:template>
<xsl:template name="allElements">
<xsl:param name="sourceElement" />
<xsl:param name="targetElement" />
<xsl:for-each select="$sourceElement">
<xsl:element name="{$targetElement}">
<xsl:value-of select="normalize-space(.)" />
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template name="journal">
<xsl:param name="journalTitle" />
<xsl:param name="issn" />
<xsl:param name="eissn" />
<xsl:param name="vol" />
<xsl:param name="issue" />
<xsl:param name="sp" />
<xsl:param name="ep" />
<xsl:element name="oaf:journal">
<xsl:attribute name="issn">
<xsl:value-of select="normalize-space($issn)" />
</xsl:attribute>
<xsl:attribute name="eissn">
<xsl:value-of select="normalize-space($eissn)" />
</xsl:attribute>
<xsl:attribute name="vol">
<xsl:value-of select="normalize-space($vol)" />
</xsl:attribute>
<xsl:attribute name="iss">
<xsl:value-of select="normalize-space($issue)" />
</xsl:attribute>
<xsl:attribute name="sp">
<xsl:value-of select="normalize-space($sp)" />
</xsl:attribute>
<xsl:attribute name="ep">
<xsl:value-of select="normalize-space($ep)" />
</xsl:attribute>
<xsl:value-of select="normalize-space($journalTitle)" />
</xsl:element>
</xsl:template>
<xsl:template name="identifiers">
<xsl:param name="sourceElement" />
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>doi</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type=&apos;doi&apos;]" />
</xsl:element>
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>pmc</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type=&apos;pmcid&apos;]" />
</xsl:element>
<xsl:element name="oaf:identifier">
<xsl:attribute name="identifierType">
<xsl:text>pmid</xsl:text>
</xsl:attribute>
<xsl:value-of select="$sourceElement[@pub-id-type=&apos;pmid&apos;]" />
</xsl:element>
</xsl:template>
<xsl:template name="authors">
<xsl:param name="sourceElement" />
<xsl:for-each select="$sourceElement">
<xsl:element name="dc:creator">
<xsl:if test="./*[local-name()=&apos;contrib-id&apos;][@contrib-id-type=&apos;orcid&apos;]">
<xsl:attribute name="nameIdentifierScheme">
<xsl:text>ORCID</xsl:text>
</xsl:attribute>
<xsl:attribute name="schemeURI">
<xsl:text>http://orcid.org/</xsl:text>
</xsl:attribute>
<xsl:attribute name="nameIdentifier">
<xsl:value-of select="substring-after(./*[local-name()=&apos;contrib-id&apos;][@contrib-id-type=&apos;orcid&apos;], &apos;http://orcid.org/&apos;)" />
</xsl:attribute>
</xsl:if> <!--
<xsl:value-of select="concat(normalize-space(./*[local-name()='name']/*[local-name()='surname']), ', ', normalize-space(./*[local-name()='name']/*[local-name()='given-names']))"/>
-->
<xsl:value-of select="concat(normalize-space(./(*[local-name()=&apos;name&apos;], *[local-name()=&apos;name-alternatives&apos;]/*[local-name()=&apos;name&apos;])/*[local-name()=&apos;surname&apos;]), &apos;, &apos;, normalize-space(./(*[local-name()=&apos;name&apos;], *[local-name()=&apos;name-alternatives&apos;]/*[local-name()=&apos;name&apos;])/*[local-name()=&apos;given-names&apos;]))" />
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="//*[local-name() = &apos;header&apos;]">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate" />
</xsl:element>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -35,7 +35,7 @@
<configuration> <configuration>
<args> <args>
<arg>-Xmax-classfile-name</arg> <arg>-Xmax-classfile-name</arg>
<arg>140</arg> <arg>200</arg>
</args> </args>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
</configuration> </configuration>

View File

@ -3,8 +3,12 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -98,14 +102,9 @@ public class MergeClaimsApplication {
raw raw
.joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer") .joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer")
.map( .map(
(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> { (MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> processClaims(
Optional<Tuple2<String, T>> opRaw = Optional.ofNullable(value._1()); Optional.ofNullable(value._1()),
Optional<Tuple2<String, T>> opClaim = Optional.ofNullable(value._2()); Optional.ofNullable(value._2())),
return opRaw.isPresent()
? opRaw.get()._2()
: opClaim.isPresent() ? opClaim.get()._2() : null;
},
Encoders.bean(clazz)) Encoders.bean(clazz))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map( .map(
@ -117,6 +116,78 @@ public class MergeClaimsApplication {
.text(outPath); .text(outPath);
} }
private static <T extends Oaf> T processClaims(Optional<Tuple2<String, T>> opRaw,
Optional<Tuple2<String, T>> opClaim) {
// when both are present
if (opClaim.isPresent() && opRaw.isPresent()) {
T oafClaim = opClaim.get()._2();
if (oafClaim instanceof Result) {
T oafRaw = opRaw.get()._2();
// merge the context lists from both oaf objects ...
final List<Context> context = mergeContexts((Result) oafClaim, (Result) oafRaw);
// ... and set it on the result from the aggregator
((Result) oafRaw).setContext(context);
return oafRaw;
}
}
// otherwise prefer the result from the aggregator
return opRaw.isPresent()
? opRaw.get()._2()
: opClaim.map(Tuple2::_2).orElse(null);
}
private static List<Context> mergeContexts(Result oafClaim, Result oafRaw) {
return new ArrayList<>(
Stream
.concat(
Optional
.ofNullable(oafClaim.getContext())
.map(List::stream)
.orElse(Stream.empty()),
Optional
.ofNullable(oafRaw.getContext())
.map(List::stream)
.orElse(Stream.empty()))
.collect(
Collectors
.toMap(
Context::getId,
c -> c,
(c1, c2) -> {
Context c = new Context();
c.setId(c1.getId());
c
.setDataInfo(
new ArrayList<>(
Stream
.concat(
Optional
.ofNullable(c1.getDataInfo())
.map(List::stream)
.orElse(Stream.empty()),
Optional
.ofNullable(c2.getDataInfo())
.map(List::stream)
.orElse(Stream.empty()))
.collect(
Collectors
.toMap(
d -> Optional
.ofNullable(d.getProvenanceaction())
.map(Qualifier::getClassid)
.orElse(""),
d -> d,
(d1, d2) -> d1))
.values()));
return c;
}))
.values());
}
private static <T extends Oaf> Dataset<T> readFromPath( private static <T extends Oaf> Dataset<T> readFromPath(
SparkSession spark, String path, Class<T> clazz) { SparkSession spark, String path, Class<T> clazz) {
return spark return spark

View File

@ -480,38 +480,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false); final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false); final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
final Relation r1 = new Relation(); Relation r1 = prepareRelation(sourceId, targetId, validationDate);
final Relation r2 = new Relation(); Relation r2 = prepareRelation(targetId, sourceId, validationDate);
if (StringUtils.isNotBlank(validationDate)) {
r1.setValidated(true);
r1.setValidationDate(validationDate);
r2.setValidated(true);
r2.setValidationDate(validationDate);
}
r1.setCollectedfrom(COLLECTED_FROM_CLAIM);
r1.setSource(sourceId);
r1.setTarget(targetId);
r1.setDataInfo(DATA_INFO_CLAIM);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
r2.setCollectedfrom(COLLECTED_FROM_CLAIM);
r2.setSource(targetId);
r2.setTarget(sourceId);
r2.setDataInfo(DATA_INFO_CLAIM);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
final String semantics = rs.getString("semantics"); final String semantics = rs.getString("semantics");
switch (semantics) { switch (semantics) {
case "resultResult_relationship_isRelatedTo": case "resultResult_relationship_isRelatedTo":
r1.setRelType(RESULT_RESULT); r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
r1.setSubRelType(RELATIONSHIP); r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
r1.setRelClass(IS_RELATED_TO);
r2.setRelType(RESULT_RESULT);
r2.setSubRelType(RELATIONSHIP);
r2.setRelClass(IS_RELATED_TO);
break; break;
case "resultProject_outcome_produces": case "resultProject_outcome_produces":
if (!"project".equals(sourceType)) { if (!"project".equals(sourceType)) {
@ -521,13 +498,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
"invalid claim, sourceId: %s, targetId: %s, semantics: %s", "invalid claim, sourceId: %s, targetId: %s, semantics: %s",
sourceId, targetId, semantics)); sourceId, targetId, semantics));
} }
r1.setRelType(RESULT_PROJECT); r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
r1.setSubRelType(OUTCOME); r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
r1.setRelClass(PRODUCES); break;
case "resultResult_publicationDataset_isRelatedTo":
r2.setRelType(RESULT_PROJECT); r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
r2.setSubRelType(OUTCOME); r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
r2.setRelClass(IS_PRODUCED_BY);
break; break;
default: default:
throw new IllegalArgumentException("claim semantics not managed: " + semantics); throw new IllegalArgumentException("claim semantics not managed: " + semantics);
@ -540,6 +516,27 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
} }
} }
private Relation prepareRelation(String sourceId, String targetId, String validationDate) {
Relation r = new Relation();
if (StringUtils.isNotBlank(validationDate)) {
r.setValidated(true);
r.setValidationDate(validationDate);
}
r.setCollectedfrom(COLLECTED_FROM_CLAIM);
r.setSource(sourceId);
r.setTarget(targetId);
r.setDataInfo(DATA_INFO_CLAIM);
r.setLastupdatetimestamp(lastUpdateTimestamp);
return r;
}
private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) {
r.setRelType(relType);
r.setSubRelType(subRelType);
r.setRelClass(relClass);
return r;
}
private List<Context> prepareContext(final String id, final DataInfo dataInfo) { private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
final Context context = new Context(); final Context context = new Context();
context.setId(id); context.setId(id);