dnet-hadoop/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/plugin/base/xml/base2oaf.transformationRule...

432 lines
17 KiB
XML

<RESOURCE_PROFILE>
<HEADER>
<RESOURCE_IDENTIFIER value="" />
<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
<RESOURCE_KIND value="TransformationRuleDSResources" />
<RESOURCE_URI value="" />
<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
</HEADER>
<BODY>
<CONFIGURATION>
<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
<SINK_METADATA_FORMAT name="oaf_hbase" />
<IMPORTED />
<SCRIPT>
<TITLE>xslt_base2oaf_hadoop</TITLE>
<CODE>
<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:base_dc="http://oai.base-search.net/base_dc/"
xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
<xsl:param name="varOfficialName" />
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7" select="'corda_______::'" />
<xsl:param name="varH2020" select="'corda__h2020::'" />
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
<xsl:param name="index" select="0" />
<xsl:param name="transDate" select="current-dateTime()" />
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<!-- TO EVALUATE
base_dc:authod_id
base_dc:authod_id/base_dc:creator_id
base_dc:authod_id/base_dc:creator_name
example:
<dc:creator>ALBU, Svetlana</dc:creator>
<base_dc:authod_id>
<base_dc:creator_name>ALBU, Svetlana</base_dc:creator_name>
<base_dc:creator_id>https://orcid.org/0000-0002-8648-950X</base_dc:creator_id>
</base_dc:authod_id>
-->
<!-- NOT USED
base_dc:global_id (I used oai:identifier)
base_dc:collection/text()
base_dc:continent
base_dc:country
base_dc:year (I used dc:date)
dc:coverage
dc:language (I used base_dc:lang)
base_dc:link (I used dc:identifier)
-->
<metadata>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:title" />
<xsl:with-param name="targetElement" select="'dc:title'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:creator/replace(., '^(.*)\|.*$', '$1')" />
<xsl:with-param name="targetElement" select="'dc:creator'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:contributor" />
<xsl:with-param name="targetElement" select="'dc:contributor'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:description" />
<xsl:with-param name="targetElement" select="'dc:description'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:subject" />
<xsl:with-param name="targetElement" select="'dc:subject'" />
</xsl:call-template>
<!-- TODO: I'm not sure if this is the correct encoding -->
<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
<dc:subject><xsl:value-of select="concat(@type, ':', .)" /></dc:subject>
</xsl:for-each>
<!-- END TODO -->
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:publisher" />
<xsl:with-param name="targetElement" select="'dc:publisher'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:format" />
<xsl:with-param name="targetElement" select="'dc:format'" />
</xsl:call-template>
<xsl:for-each select="//base_dc:typenorm">
<dc:type>
<xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" />
</dc:type>
</xsl:for-each>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:type" />
<xsl:with-param name="targetElement" select="'dc:type'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:source" />
<xsl:with-param name="targetElement" select="'dc:source'" />
</xsl:call-template>
<dc:language>
<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
</dc:language>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:rights" />
<xsl:with-param name="targetElement" select="'dc:rights'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:relation" />
<xsl:with-param name="targetElement" select="'dc:relation'" />
</xsl:call-template>
<xsl:if test="not(//dc:identifier[starts-with(., 'http')])">
<xsl:call-template name="terminate" />
</xsl:if>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:identifier[starts-with(., 'http')]" />
<xsl:with-param name="targetElement" select="'dc:identifier'" />
</xsl:call-template>
<xsl:for-each select="//dc:relation">
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid>
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
</oaf:projectid>
</xsl:if>
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid>
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<xsl:choose>
<!-- I used an inline mapping because the field typenorm could be repeated and I have to specify a list of priority -->
<!-- Book part -->
<xsl:when test="//base_dc:typenorm = '111'">
<dr:CobjCategory type="publication">0013</dr:CobjCategory>
</xsl:when>
<!-- Book -->
<xsl:when test="//base_dc:typenorm = '11'">
<dr:CobjCategory type="publication">0002</dr:CobjCategory>
</xsl:when>
<!-- Article contribution -->
<xsl:when test="//base_dc:typenorm = '121'">
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
</xsl:when>
<!-- Journal/Newspaper -->
<xsl:when test="//base_dc:typenorm = '12'">
<dr:CobjCategory type="publication">0043</dr:CobjCategory>
</xsl:when>
<!-- Report -->
<xsl:when test="//base_dc:typenorm = '14'">
<dr:CobjCategory type="publication">0017</dr:CobjCategory>
</xsl:when>
<!-- Review -->
<xsl:when test="//base_dc:typenorm = '15'">
<dr:CobjCategory type="publication">0015</dr:CobjCategory>
</xsl:when>
<!-- Lecture -->
<xsl:when test="//base_dc:typenorm = '17'">
<dr:CobjCategory type="publication">0010</dr:CobjCategory>
</xsl:when>
<!-- Bachelor's thesis -->
<xsl:when test="//base_dc:typenorm = '181'">
<dr:CobjCategory type="publication">0008</dr:CobjCategory>
</xsl:when>
<!-- Master's thesis -->
<xsl:when test="//base_dc:typenorm = '182'">
<dr:CobjCategory type="publication">0007</dr:CobjCategory>
</xsl:when>
<!-- Doctoral and postdoctoral thesis -->
<xsl:when test="//base_dc:typenorm = '183'">
<dr:CobjCategory type="publication">0006</dr:CobjCategory>
</xsl:when>
<!-- Thesis -->
<xsl:when test="//base_dc:typenorm = '18'">
<dr:CobjCategory type="publication">0044</dr:CobjCategory>
</xsl:when>
<!-- Patent -->
<xsl:when test="//base_dc:typenorm = '1A'">
<dr:CobjCategory type="publication">0019</dr:CobjCategory>
</xsl:when>
<!-- Text -->
<xsl:when test="//base_dc:typenorm = '1'">
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
</xsl:when>
<!-- Software -->
<xsl:when test="//base_dc:typenorm = '6'">
<dr:CobjCategory type="software">0029</dr:CobjCategory>
</xsl:when>
<!-- Dataset -->
<xsl:when test="//base_dc:typenorm = '7'">
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
</xsl:when>
<!-- Still image -->
<xsl:when test="//base_dc:typenorm = '51'">
<dr:CobjCategory type="other">0025</dr:CobjCategory>
</xsl:when>
<!-- Moving image/Video -->
<xsl:when test="//base_dc:typenorm = '52'">
<dr:CobjCategory type="other">0024</dr:CobjCategory>
</xsl:when>
<!-- Image/Video -->
<xsl:when test="//base_dc:typenorm = '5'">
<dr:CobjCategory type="other">0033</dr:CobjCategory>
</xsl:when>
<!-- Audio -->
<xsl:when test="//base_dc:typenorm = '4'">
<dr:CobjCategory type="other">0030</dr:CobjCategory>
</xsl:when>
<!-- Musical notation -->
<xsl:when test="//base_dc:typenorm = '2'">
<dr:CobjCategory type="other">0020</dr:CobjCategory>
</xsl:when>
<!-- Map -->
<xsl:when test="//base_dc:typenorm = '3'">
<dr:CobjCategory type="other">0020</dr:CobjCategory>
</xsl:when>
<!-- Other non-article -->
<xsl:when test="//base_dc:typenorm = '122'">
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
</xsl:when>
<!-- Course material -->
<xsl:when test="//base_dc:typenorm = '16'">
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
</xsl:when>
<!-- Manuscript -->
<xsl:when test="//base_dc:typenorm = '19'">
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
</xsl:when>
<!-- Conference object -->
<xsl:when test="//base_dc:typenorm = '13'">
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
</xsl:when>
<!-- Unknown -->
<xsl:when test="//base_dc:typenorm = 'F'">
<dr:CobjCategory type="other">0000</dr:CobjCategory>
</xsl:when>
<xsl:otherwise>
<dr:CobjCategory type="other">0000</dr:CobjCategory>
</xsl:otherwise>
</xsl:choose>
<oaf:accessrights>
<xsl:choose>
<xsl:when test="//base_dc:oa[.='0']">CLOSED</xsl:when>
<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
<xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when>
<xsl:when test="//base_dc:rightsnorm">
<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
</xsl:when>
<xsl:when test="//dc:rights">
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
</xsl:when>
<xsl:otherwise>UNKNOWN</xsl:otherwise>
</xsl:choose>
</oaf:accessrights>
<xsl:for-each select="//base_dc:doi">
<oaf:identifier identifierType="doi">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
<oaf:identifier identifierType="url">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
<oaf:identifier identifierType="handle">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
<oaf:identifier identifierType='urn'>
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<oaf:identifier identifierType="oai-original">
<xsl:value-of
select="//oai:header/oai:identifier" />
</oaf:identifier>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="//base_dc:collname" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId" />
</xsl:attribute>
</oaf:collectedFrom>
<oaf:dateAccepted>
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
</oaf:dateAccepted>
<xsl:if test="//base_dc:oa[.='1']">
<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
<oaf:fulltext>
<xsl:value-of select="normalize-space(.)" />
</oaf:fulltext>
</xsl:for-each>
</xsl:if>
<xsl:for-each select="//base_dc:collection/@ror_id">
<oaf:relation relType="resultOrganization"
subRelType="affiliation"
relClass="hasAuthorInstitution"
targetType="organization">
<xsl:choose>
<xsl:when test="contains(.,'https://ror.org/')">
<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
</xsl:otherwise>
</xsl:choose>
</oaf:relation>
</xsl:for-each>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template name="allElements">
<xsl:param name="sourceElement" />
<xsl:param name="targetElement" />
<xsl:for-each select="$sourceElement">
<xsl:element name="{$targetElement}">
<xsl:value-of select="normalize-space(.)" />
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:if test="//oai:header/@status='deleted'">
<xsl:call-template name="terminate" />
</xsl:if>
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate" />
</xsl:element>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
</CODE>
</SCRIPT>
</CONFIGURATION>
<STATUS />
<SECURITY_PARAMETERS />
</BODY>
</RESOURCE_PROFILE>