BrBETA_dnet-hadoop/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/xslt_dc2oaf_narcis_hadoop.xsl

352 lines
22 KiB
XML

<!-- original: xslt_dc2oaf_narcis from PROD 2021-11-18 -->
<xsl:stylesheet
version="2.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
exclude-result-prefixes="xsl vocabulary dateCleaner">
<xsl:param name="varOfficialName" />
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7" select="'corda_______::'"/>
<xsl:param name="varH2020" select="'corda__h2020::'"/>
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
<xsl:param name="index" select="0"/>
<xsl:param name="transDate" select="current-dateTime()"/>
<xsl:variable name="vCodes">
<codes>
<code key="uva" value="od_______323" id="opendoar____::323" name="Universiteit van Amsterdam Digital Academic Repository"/>
<code key="uvapub" value="od_______323" id="opendoar____::323" name="Universiteit van Amsterdam Digital Academic Repository"/>
<code key="vumc" value="od_______323" id="opendoar____::323" name="Universiteit van Amsterdam Digital Academic Repository"/>
<code key="cwi" value="od______2358" id="opendoar____::2358" name="Repository CWI Amsterdam"/> <!-- CWI -->
<code key="eur" value="od______1113" id="opendoar____::1113" name="Erasmus University Institutional Repository"/>
<code key="wur" value="od_______370" id="opendoar____::370" name="Wageningen Yield"/>
<code key="uu" value="od_______101" id="opendoar____::101" name="Utrecht University Repository"/>
<code key="ru" value="od______1236" id="opendoar____::1236" name="Radboud Repository"/> <!-- Radboud -->
<code key="run" value="od______1236" id="opendoar____::1236" name="Radboud Repository"/> <!-- Radboud -->
<code key="uvt" value="od_______550" id="opendoar____::550" name="Tilburg University Repository"/> <!-- Tilburg -->
<code key="aup" value="od________19" id="opendoar____::19" name="Amsterdam University Press Publications"/> <!-- amsterdam univ. press -->
<code key="rug" value="od_______189" id="opendoar____::189" name="University of Groningen Digital Archive"/> <!-- groningen -->
<code key="dans" value="r384e1237760" id="re3data_____::r3d100010214" name="EASY"/> <!--easy -->
<code key="differ" value="differ______" id="openaire____::75ee19e2-ff9e-47f7-bed6-2e3ee23e2b49" name="Dutch Institute for Fundamental Energy Research"/>
<code key="kit" value="od______1423" id="opendoar____::1423" name="Search4Dev"/> <!-- search4dev -->
<code key="ul" value="od_______202" id="opendoar____::202" name="Leiden University Repository"/> <!-- leiden -->
<code key="um" value="od________83" id="opendoar____::83" name="UM Publications"/>
<code key="knaw" value="od______1476" id="opendoar____::1476" name="KNAW Repository"/>
<code key="vu" value="od_______369" id="opendoar____::369" name="DSpace at VU"/>
<code key="ut" value="od_______354" id="opendoar____::354" name="Universiteit Twente Repository"/>
<code key="hbo" value="hbo_________" id="openaire____::79c8217f-00ee-4902-9743-9e11b4970c60" name="HBO Kennisbank"/>
<code key="kim" value="kim_________" id="openaire____::b1b15b72-bf0b-4f91-9f95-dab2e43d3eaa" name="Publicaties KiM"/>
<code key="nivel" value="nivel_____nl" id="driver______::daf0542d-1ef5-4f9d-80f1-62849b92aefa" name="NIVEL publications"/>
<code key="ntrl" value="od_______913" id="opendoar____::913" name="Naturalis Publications"/>
<code key="nyenrode" value="nyenrode____" id="openaire____::e57352f3-516b-42cb-b666-2480233c6513" name="Publications of the University Nyenrode"/>
<code key="ou" value="od_______233" id="opendoar____::233" name="DSpace at Open Universiteit Nederland "/>
<code key="ptu" value="ptu_________" id="openaire____::openaire____::f834f1fe-8198-4929-ac0b-b1c1bf166f38" name="Protestantse Theologische Universiteit"/>
<code key="rivm" value="od_______881" id="opendoar____::881" name="Web-based Archive of RIVM Publications"/>
<code key="scp" value="scp_________" id="openaire____::088a0087-4bc6-4c38-a052-b446c3b225a7" name="Sociaal en Cultureel Planbureau"/>
<code key="swov" value="swov________" id="openaire____::06d89df2-b613-4989-9dc3-f60f2fc593f6" name="Stichting Wetenschappelijk Onderzoek Verkeersveiligheid (SWOV) Library Repository"/>
<code key="tno" value="tno_________" id="openaire____::58fd0ad2-c476-11e5-80b3-0021e9e777ac" name="TNO Repository - hosted by TU Delft Library"/>
<code key="tue" value="od_______567" id="opendoar____::567" name="Repository TU/e"/>
<code key="tuk" value="tuk_________" id="openaire____::df55d991-1ebb-459c-aed6-559bcbb1d277::" name="Theological University Kampen"/>
<code key="uvh" value="uvh_______nl" id="driver______::a422c38b-73de-44bf-a340-a4fd5f0817ea" name="Universiteit voor Humanistiek"/>
<code key="tua" value="tua_________" id="openaire____::cd073e1e-2fe9-4ea7-aea5-dc6855c347f7" name="Theological University Apeldoorn"/>
<code key="tud" value="od_______571" id="opendoar____::571" name="TU Delft Repository"/>
<code key="wodc" value="wodc______nl" id="driver______::03c60250-9d65-44fe-85d3-23503b3303af" name="WODC Repository Ministerie van Veiligheid en Justitie"/>
<code key="unesco" value="unesco___ihe" id="2877c7c4-b57a-4f62-9c16-d7faa5b0b98b" name="UNESCO-IHE Institute for Water Education"/>
</codes>
</xsl:variable>
<!-- not considered
hbo added
tno added
differ added
nyenrode added
beeldengeluis todo, not yet found in metadata
philips todo, not yet found in metadata
scp added
swov added
tuk added
tua added
ptu added
ut_restricted ? merge with ut?
nda
neyenrode
-->
<xsl:key name="kCodeByName" match="code" use="string(@key)"/>
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<metadata>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:title"/>
<xsl:with-param name="targetElement" select="'dc:title'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:creator/replace(., '^(.*)\|.*$', '$1')"/>
<xsl:with-param name="targetElement" select="'dc:creator'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:contributor"/>
<xsl:with-param name="targetElement" select="'dc:contributor'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:description"/>
<xsl:with-param name="targetElement" select="'dc:description'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:subject"/>
<xsl:with-param name="targetElement" select="'dc:subject'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:publisher"/>
<xsl:with-param name="targetElement" select="'dc:publisher'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:format"/>
<xsl:with-param name="targetElement" select="'dc:format'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:type"/>
<xsl:with-param name="targetElement" select="'dc:type'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:source"/>
<xsl:with-param name="targetElement" select="'dc:source'"/>
</xsl:call-template>
<dc:language>
<xsl:value-of select="vocabulary:clean( //dc:language, 'dnet:languages')"/>
</dc:language>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:rights"/>
<xsl:with-param name="targetElement" select="'dc:rights'"/>
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:relation"/>
<xsl:with-param name="targetElement" select="'dc:relation'"/>
</xsl:call-template>
<xsl:if test="not(//dc:identifier[starts-with(., 'http')])">
<xsl:call-template name="terminate"/>
</xsl:if>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:identifier[starts-with(., 'http')]"/>
<xsl:with-param name="targetElement" select="'dc:identifier'"/>
</xsl:call-template>
<xsl:for-each select="//dc:relation">
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid>
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"/>
</oaf:projectid>
</xsl:if>
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid>
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"/>
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<!--
<xsl:if test="not(//didl:Component/didl:Resource[@mimeType='application/pdf'])">
<xsl:call-template name="terminate"/>
</xsl:if>
-->
<oaf:accessrights>
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')"/>
</oaf:accessrights>
<!--
<dr:CobjCategory>
<xsl:value-of select="TransformationFunction:convertString($tf, //dc:type, 'TextTypologies')" />
</dr:CobjCategory>
-->
<dr:CobjCategory>
<xsl:variable name="varCobjCategory" select="vocabulary:clean( //dc:type, 'dnet:publication_resource')" />
<xsl:variable name="varSuperType" select="vocabulary:clean( $varCobjCategory, 'dnet:result_typologies')"/>
<xsl:attribute name="type" select="$varSuperType"/>
<xsl:value-of select="$varCobjCategory" />
</dr:CobjCategory>
<!-- review status -->
<!-- -->
<xsl:variable name="varRefereedConvt" select="for $i in (//dc:type, //dc:description, //oai:setSpec)
return vocabulary:clean( normalize-space($i), 'dnet:review_levels')"/>
<xsl:variable name="varRefereedIdntf" select="(//*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])pre[\.\-_/\s\(\)%\d#]?prints?([\.\-_/\s\(\)%\d#].*)?$')][count(//dc:identifier) = 1]/'0002', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])refereed([\.\-_/\s\(\)\d%\d#].*)?$')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001')"/>
<xsl:variable name="varRefereedSourc" select="//*[string(node-name(.)) = ('dc:source', 'dc:publisher') and matches(lower-case(.), '^(.*\s)?pre[\s\-_]*prints?([\s\.,].*)?$')]/'0002'"/>
<xsl:variable name="varRefereedDescr" select="(//dc:description[matches(lower-case(.), '.*(this\s*book|this\s*volume|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001', //dc:description[matches(., '^version\s*(préliminaire.*|preliminary.*|0$)')]/'0002')"/>
<xsl:variable name="varRefereedTitle" select="(//dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001',
//dc:title[matches(lower-case(.), '.*\(\s*pre[\s\-\._]*prints?\s*\)\s*$')]/'0002')"/>
<xsl:variable name="varRefereedSubjt" select="(//dc:subject[matches(lower-case(.), '^\s*refereed\s*$')][//oaf:datasourceprefix = 'narcis______']/'0001',
//dc:subject[matches(lower-case(.), '^\s*no[nt].{0,3}refereed\s*$')][//oaf:datasourceprefix = 'narcis______']/'0002')"/>
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedIdntf, $varRefereedSourc, $varRefereedDescr, $varRefereedTitle, $varRefereedSubjt)"/>
<xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) > 0">
<oaf:refereed>
<xsl:value-of select="'0001'"/>
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) > 0">
<oaf:refereed>
<xsl:value-of select="'0002'"/>
</oaf:refereed>
</xsl:when>
</xsl:choose>
<oaf:dateAccepted>
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )"/>
</oaf:dateAccepted>
<xsl:if test="//dc:relation[starts-with(., 'http')] and //dc:rights[.='info:eu-repo/semantics/openAccess']">
<oaf:fulltext>
<xsl:value-of select="//dc:relation[starts-with(., 'http')]"/>
</oaf:fulltext>
</xsl:if>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="key('kCodeByName', $repoCode, $vCodes)/@name"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="key('kCodeByName', $repoCode, $vCodes)/@id"/>
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:collectedFrom>
<!-- ID recognition incomplete -->
<xsl:variable name="varKnownFileEndings" select="('.bmp', '.doc', '.docx', '.epub', '.flv', '.jpeg', '.jpg', '.m4v', '.mp4', '.mpg', '.odp', '.pdf', '.png', '.ppt', '.tiv', '.txt', '.xls', '.xlsx', '.zip')" />
<!-- regular expression for DOIs reduced here - letters like less-than and quotation marks don't work in matches, use identiferExtract when enabled -->
<xsl:variable name="varIdDoi" select="distinct-values((//dc:identifier[starts-with(., '10.')][matches(., '(10[.][0-9]{4,}[^\s/>]*/[^\s>]+)')], //dc:identifier[starts-with(., 'http') and (contains(., '://dx.doi.org/10.') or contains(., '://doi.org/10.'))]/substring-after(., 'doi.org/'), //dc:identifier[starts-with(lower-case(.), 'doi:10.')]/substring-after(lower-case(.), 'doi:')))" />
<xsl:for-each select="$varIdDoi">
<oaf:identifier>
<xsl:attribute name="identifierType" select="'doi'"/>
<xsl:value-of select="."/>
</oaf:identifier>
</xsl:for-each>
<xsl:variable name="varIdHdl" select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))" />
<xsl:for-each select="$varIdHdl">
<oaf:identifier>
<xsl:attribute name="identifierType" select="'handle'"/>
<xsl:value-of select="."/>
</oaf:identifier>
</xsl:for-each>
<xsl:variable name="varIdUrn" select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])" />
<xsl:for-each select="$varIdUrn">
<oaf:identifier>
<xsl:attribute name="identifierType" select="'urn'"/>
<xsl:value-of select="."/>
</oaf:identifier>
</xsl:for-each>
<!-- the 2 comparison orders needed to work also for URL encoded baseURLs or item URLs -->
<xsl:variable name="varOrigBaseUrl" select="//*[local-name() = 'about']/*[local-name() = 'provenance']//*[local-name() = 'originDescription' and not(./*[local-name() = 'originDescription'])]/*[local-name() = 'baseURL']" />
<xsl:variable name="varIdLdpg" select="distinct-values(//dc:identifier[(contains(substring-after(., '://'), '/') and contains($varOrigBaseUrl, substring-before(substring-after(., '://'), '/'))) or (contains(substring-after(., '://'), ':') and contains($varOrigBaseUrl, substring-before(substring-after(., '://'), ':')))][not(replace(lower-case(.), '.*(\.[a-z]*)$', '$1') = $varKnownFileEndings)])" />
<xsl:for-each select="$varIdLdpg">
<oaf:identifier>
<xsl:attribute name="identifierType" select="'landingPage'"/>
<xsl:value-of select="."/>
</oaf:identifier>
</xsl:for-each>
<xsl:variable name="varIdUrl" select="distinct-values(//dc:identifier[starts-with(., 'http')][not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/'))][count(index-of($varIdLdpg, .)) = 0])" />
<xsl:for-each select="$varIdUrl">
<oaf:identifier>
<xsl:attribute name="identifierType" select="'url'"/>
<xsl:value-of select="."/>
</oaf:identifier>
</xsl:for-each>
<oaf:identifier>
<xsl:attribute name="identifierType" select="'oai-original'"/>
<xsl:value-of select="//*[local-name() = 'about']/*[local-name() = 'provenance']//*[local-name() = 'originDescription' and not(./*[local-name() = 'originDescription'])]/*[local-name() = 'identifier']"/>
</oaf:identifier>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template name="allElements">
<xsl:param name="sourceElement"/>
<xsl:param name="targetElement"/>
<xsl:for-each select="$sourceElement">
<xsl:element name="{$targetElement}">
<xsl:value-of select="normalize-space(.)"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate"/>
</xsl:element>
</xsl:copy>
</xsl:template>
<!--
<xsl:template match="dri:objIdentifier">
<xsl:variable name="objIdentifier" select="substring-after(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
<xsl:variable name="nsPrefix" select="key('kCodeByName', $repoCode, $vCodes)/@value"/>
<xsl:if test="string-length($nsPrefix) = 0">
<xsl:call-template name="terminate"/>
</xsl:if>
<xsl:copy>
<xsl:value-of select="concat( $nsPrefix , '::', transformExt:md5Hex(string($objIdentifier)))"/>
</xsl:copy>
</xsl:template>
-->
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>