fix base tr

This commit is contained in:
Michele Artini 2024-11-14 09:29:10 +01:00
parent ed560dacc0
commit c5b9a1592e
1 changed files with 493 additions and 424 deletions

View File

@ -1,6 +1,7 @@
<RESOURCE_PROFILE> <RESOURCE_PROFILE>
<HEADER> <HEADER>
<RESOURCE_IDENTIFIER value="2ad0cdd9-c96c-484c-8b0e-ed56d86891fe_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" /> <RESOURCE_IDENTIFIER
value="2ad0cdd9-c96c-484c-8b0e-ed56d86891fe_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
<RESOURCE_KIND value="TransformationRuleDSResources"/> <RESOURCE_KIND value="TransformationRuleDSResources"/>
<RESOURCE_URI value=""/> <RESOURCE_URI value=""/>
@ -14,23 +15,32 @@
<SCRIPT> <SCRIPT>
<TITLE>xslt_base2odf_hadoop</TITLE> <TITLE>xslt_base2odf_hadoop</TITLE>
<CODE> <CODE>
<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO" xmlns:base_dc="http://oai.base-search.net/base_dc/" <xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:base_dc="http://oai.base-search.net/base_dc/"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:dc="http://purl.org/dc/elements/1.1/"
exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0"> exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
<xsl:param name="varOfficialName"/> <xsl:param name="varOfficialName"/>
<xsl:param name="varDataSourceId"/> <xsl:param name="varDataSourceId"/>
<xsl:param name="varFP7" select="'corda_______::'"/> <xsl:param name="varFP7" select="'corda_______::'"/>
<xsl:param name="varH2020" select="'corda__h2020::'"/> <xsl:param name="varH2020" select="'corda__h2020::'"/>
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" /> <xsl:param name="repoCode"
select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')"/>
<xsl:param name="index" select="0"/> <xsl:param name="index" select="0"/>
<xsl:param name="transDate" select="current-dateTime()"/> <xsl:param name="transDate" select="current-dateTime()"/>
<xsl:template name="terminate"> <xsl:template name="terminate">
<xsl:message terminate="yes"> <xsl:message terminate="yes"> record is not compliant, transformation is
record is not compliant, transformation is interrupted. interrupted. </xsl:message>
</xsl:message>
</xsl:template> </xsl:template>
<xsl:template match="/"> <xsl:template match="/">
@ -68,21 +78,24 @@
</datacite:identifier> </datacite:identifier>
</xsl:for-each> </xsl:for-each>
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))"> <xsl:for-each
<datacite:identifier alternateIdentifierType="handle"> select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
<datacite:identifier
alternateIdentifierType="handle">
<xsl:value-of select="."/> <xsl:value-of select="."/>
</datacite:identifier> </datacite:identifier>
</xsl:for-each> </xsl:for-each>
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])"> <xsl:for-each
<datacite:identifier alternateIdentifierType='urn'> select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
<datacite:identifier alternateIdentifierType="urn">
<xsl:value-of select="."/> <xsl:value-of select="."/>
</datacite:identifier> </datacite:identifier>
</xsl:for-each> </xsl:for-each>
<datacite:identifier alternateIdentifierType="oai-original"> <datacite:identifier
<xsl:value-of alternateIdentifierType="oai-original">
select="//oai:header/oai:identifier" /> <xsl:value-of select="//oai:header/oai:identifier"/>
</datacite:identifier> </datacite:identifier>
</datacite:alternateIdentifiers> </datacite:alternateIdentifiers>
@ -91,7 +104,11 @@
<xsl:for-each select="//base_dc:typenorm"> <xsl:for-each select="//base_dc:typenorm">
<datacite:resourceType><xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" /></datacite:resourceType> <datacite:resourceType>
<xsl:value-of
select="vocabulary:clean(., 'base:normalized_types')"
/>
</datacite:resourceType>
</xsl:for-each> </xsl:for-each>
<datacite:titles> <datacite:titles>
@ -104,15 +121,20 @@
<datacite:creators> <datacite:creators>
<xsl:for-each select="//dc:creator"> <xsl:for-each select="//dc:creator">
<xsl:variable name="author" select="normalize-space(.)" /> <xsl:variable name="author"
select="normalize-space(.)"/>
<datacite:creator> <datacite:creator>
<datacite:creatorName> <datacite:creatorName>
<xsl:value-of select="$author"/> <xsl:value-of select="$author"/>
</datacite:creatorName> </datacite:creatorName>
<xsl:for-each select="//base_dc:authod_id[normalize-space(./base_dc:creator_name) = $author]/base_dc:creator_id "> <xsl:for-each
select="//base_dc:authod_id[normalize-space(./base_dc:creator_name) = $author]/base_dc:creator_id ">
<xsl:if test="contains(.,'https://orcid.org/')"> <xsl:if test="contains(.,'https://orcid.org/')">
<nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID"> <nameIdentifier schemeURI="https://orcid.org/"
<xsl:value-of select="substring-after(., 'https://orcid.org/')" /> nameIdentifierScheme="ORCID">
<xsl:value-of
select="substring-after(., 'https://orcid.org/')"
/>
</nameIdentifier> </nameIdentifier>
</xsl:if> </xsl:if>
</xsl:for-each> </xsl:for-each>
@ -145,8 +167,10 @@
</datacite:subject> </datacite:subject>
</xsl:for-each> </xsl:for-each>
<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode"> <xsl:for-each
<datacite:subject subjectScheme="{@type}" classificationCode="{normalize-space(.)}"> select="//base_dc:classcode|//base_dc:autoclasscode">
<datacite:subject subjectScheme="{@type}"
classificationCode="{normalize-space(.)}">
<!-- TODO the value should be obtained by the Code --> <!-- TODO the value should be obtained by the Code -->
<xsl:value-of select="normalize-space(.)"/> <xsl:value-of select="normalize-space(.)"/>
</datacite:subject> </datacite:subject>
@ -174,10 +198,12 @@
</datacite:formats> </datacite:formats>
<datacite:language> <datacite:language>
<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" /> <xsl:value-of
select="vocabulary:clean( //base_dc:lang, 'dnet:languages')"
/>
</datacite:language> </datacite:language>
<oaf:accessrights> <!--<datacite:rightsList>
<xsl:if test="//base_dc:oa[.='0']"> <xsl:if test="//base_dc:oa[.='0']">
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_16ec">restricted access</datacite:rights> <datacite:rights rightsURI="http://purl.org/coar/access_right/c_16ec">restricted access</datacite:rights>
</xsl:if> </xsl:if>
@ -185,21 +211,29 @@
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights> <datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
</xsl:if> </xsl:if>
<xsl:for-each select="//dc:rights|//base_dc:rightsnorm"> <xsl:for-each select="//dc:rights|//base_dc:rightsnorm">
<datacite:rights><xsl:value-of select="vocabulary:clean(., 'dnet:access_modes')" /></datacite:rights> <datacite:rights>
<xsl:value-of select="vocabulary:clean(., 'dnet:access_modes')"/>
</datacite:rights>
</xsl:for-each> </xsl:for-each>
</oaf:accessrights> </datacite:rightsList>-->
</datacite:resource> </datacite:resource>
<xsl:for-each select="//dc:relation"> <xsl:for-each select="//dc:relation">
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')"> <xsl:if
test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid> <oaf:projectid>
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" /> <xsl:value-of
select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"
/>
</oaf:projectid> </oaf:projectid>
</xsl:if> </xsl:if>
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')"> <xsl:if
test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid> <oaf:projectid>
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" /> <xsl:value-of
select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))"
/>
</oaf:projectid> </oaf:projectid>
</xsl:if> </xsl:if>
</xsl:for-each> </xsl:for-each>
@ -209,68 +243,81 @@
<!-- Book part --> <!-- Book part -->
<xsl:when test="//base_dc:typenorm = '111'"> <xsl:when test="//base_dc:typenorm = '111'">
<dr:CobjCategory type="publication">0013</dr:CobjCategory> <dr:CobjCategory type="publication"
>0013</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Book --> <!-- Book -->
<xsl:when test="//base_dc:typenorm = '11'"> <xsl:when test="//base_dc:typenorm = '11'">
<dr:CobjCategory type="publication">0002</dr:CobjCategory> <dr:CobjCategory type="publication"
>0002</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Article contribution --> <!-- Article contribution -->
<xsl:when test="//base_dc:typenorm = '121'"> <xsl:when test="//base_dc:typenorm = '121'">
<dr:CobjCategory type="publication">0001</dr:CobjCategory> <dr:CobjCategory type="publication"
>0001</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Journal/Newspaper --> <!-- Journal/Newspaper -->
<xsl:when test="//base_dc:typenorm = '12'"> <xsl:when test="//base_dc:typenorm = '12'">
<dr:CobjCategory type="publication">0043</dr:CobjCategory> <dr:CobjCategory type="publication"
>0043</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Report --> <!-- Report -->
<xsl:when test="//base_dc:typenorm = '14'"> <xsl:when test="//base_dc:typenorm = '14'">
<dr:CobjCategory type="publication">0017</dr:CobjCategory> <dr:CobjCategory type="publication"
>0017</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Review --> <!-- Review -->
<xsl:when test="//base_dc:typenorm = '15'"> <xsl:when test="//base_dc:typenorm = '15'">
<dr:CobjCategory type="publication">0015</dr:CobjCategory> <dr:CobjCategory type="publication"
>0015</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Lecture --> <!-- Lecture -->
<xsl:when test="//base_dc:typenorm = '17'"> <xsl:when test="//base_dc:typenorm = '17'">
<dr:CobjCategory type="publication">0010</dr:CobjCategory> <dr:CobjCategory type="publication"
>0010</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Bachelor's thesis --> <!-- Bachelor's thesis -->
<xsl:when test="//base_dc:typenorm = '181'"> <xsl:when test="//base_dc:typenorm = '181'">
<dr:CobjCategory type="publication">0008</dr:CobjCategory> <dr:CobjCategory type="publication"
>0008</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Master's thesis --> <!-- Master's thesis -->
<xsl:when test="//base_dc:typenorm = '182'"> <xsl:when test="//base_dc:typenorm = '182'">
<dr:CobjCategory type="publication">0007</dr:CobjCategory> <dr:CobjCategory type="publication"
>0007</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Doctoral and postdoctoral thesis --> <!-- Doctoral and postdoctoral thesis -->
<xsl:when test="//base_dc:typenorm = '183'"> <xsl:when test="//base_dc:typenorm = '183'">
<dr:CobjCategory type="publication">0006</dr:CobjCategory> <dr:CobjCategory type="publication"
>0006</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Thesis --> <!-- Thesis -->
<xsl:when test="//base_dc:typenorm = '18'"> <xsl:when test="//base_dc:typenorm = '18'">
<dr:CobjCategory type="publication">0044</dr:CobjCategory> <dr:CobjCategory type="publication"
>0044</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Patent --> <!-- Patent -->
<xsl:when test="//base_dc:typenorm = '1A'"> <xsl:when test="//base_dc:typenorm = '1A'">
<dr:CobjCategory type="publication">0019</dr:CobjCategory> <dr:CobjCategory type="publication"
>0019</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Text --> <!-- Text -->
<xsl:when test="//base_dc:typenorm = '1'"> <xsl:when test="//base_dc:typenorm = '1'">
<dr:CobjCategory type="publication">0001</dr:CobjCategory> <dr:CobjCategory type="publication"
>0001</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Software --> <!-- Software -->
@ -315,22 +362,26 @@
<!-- Other non-article --> <!-- Other non-article -->
<xsl:when test="//base_dc:typenorm = '122'"> <xsl:when test="//base_dc:typenorm = '122'">
<dr:CobjCategory type="publication">0038</dr:CobjCategory> <dr:CobjCategory type="publication"
>0038</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Course material --> <!-- Course material -->
<xsl:when test="//base_dc:typenorm = '16'"> <xsl:when test="//base_dc:typenorm = '16'">
<dr:CobjCategory type="publication">0038</dr:CobjCategory> <dr:CobjCategory type="publication"
>0038</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Manuscript --> <!-- Manuscript -->
<xsl:when test="//base_dc:typenorm = '19'"> <xsl:when test="//base_dc:typenorm = '19'">
<dr:CobjCategory type="publication">0038</dr:CobjCategory> <dr:CobjCategory type="publication"
>0038</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Conference object --> <!-- Conference object -->
<xsl:when test="//base_dc:typenorm = '13'"> <xsl:when test="//base_dc:typenorm = '13'">
<dr:CobjCategory type="publication">0004</dr:CobjCategory> <dr:CobjCategory type="publication"
>0004</dr:CobjCategory>
</xsl:when> </xsl:when>
<!-- Unknown --> <!-- Unknown -->
@ -348,15 +399,23 @@
<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when> <xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
<xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when> <xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when>
<xsl:when test="//base_dc:rightsnorm"> <xsl:when test="//base_dc:rightsnorm">
<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" /> <xsl:value-of
select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')"
/>
</xsl:when> </xsl:when>
<xsl:when test="//dc:rights"> <xsl:when test="//dc:rights">
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" /> <xsl:value-of
select="vocabulary:clean( //dc:rights, 'dnet:access_modes')"
/>
</xsl:when> </xsl:when>
<xsl:otherwise>UNKNOWN</xsl:otherwise> <xsl:otherwise>UNKNOWN</xsl:otherwise>
</xsl:choose> </xsl:choose>
</oaf:accessrights> </oaf:accessrights>
<xsl:if test="//base_dc:rightsnorm">
<oaf:license><xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:licenses')" /></oaf:license>
</xsl:if>
<xsl:for-each select="//base_dc:doi"> <xsl:for-each select="//base_dc:doi">
<oaf:identifier identifierType="doi"> <oaf:identifier identifierType="doi">
<xsl:value-of select="."/> <xsl:value-of select="."/>
@ -370,21 +429,22 @@
</oaf:identifier> </oaf:identifier>
</xsl:for-each> </xsl:for-each>
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))"> <xsl:for-each
select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
<oaf:identifier identifierType="handle"> <oaf:identifier identifierType="handle">
<xsl:value-of select="."/> <xsl:value-of select="."/>
</oaf:identifier> </oaf:identifier>
</xsl:for-each> </xsl:for-each>
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])"> <xsl:for-each
<oaf:identifier identifierType='urn'> select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
<oaf:identifier identifierType="urn">
<xsl:value-of select="."/> <xsl:value-of select="."/>
</oaf:identifier> </oaf:identifier>
</xsl:for-each> </xsl:for-each>
<oaf:identifier identifierType="oai-original"> <oaf:identifier identifierType="oai-original">
<xsl:value-of <xsl:value-of select="//oai:header/oai:identifier"/>
select="//oai:header/oai:identifier" />
</oaf:identifier> </oaf:identifier>
<oaf:hostedBy> <oaf:hostedBy>
@ -392,7 +452,9 @@
<xsl:value-of select="//base_dc:collname"/> <xsl:value-of select="//base_dc:collname"/>
</xsl:attribute> </xsl:attribute>
<xsl:attribute name="id"> <xsl:attribute name="id">
<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" /> <xsl:value-of
select="concat('opendoar____::', //base_dc:collection/@opendoar_id)"
/>
</xsl:attribute> </xsl:attribute>
</oaf:hostedBy> </oaf:hostedBy>
@ -418,13 +480,19 @@
</xsl:if> </xsl:if>
<xsl:for-each select="//base_dc:collection/@ror_id"> <xsl:for-each select="//base_dc:collection/@ror_id">
<oaf:relation relType="resultOrganization" subRelType="affiliation" relClass="hasAuthorInstitution" targetType="organization"> <oaf:relation relType="resultOrganization"
subRelType="affiliation" relClass="hasAuthorInstitution"
targetType="organization">
<xsl:choose> <xsl:choose>
<xsl:when test="contains(.,'https://ror.org/')"> <xsl:when test="contains(.,'https://ror.org/')">
<xsl:value-of select="concat('ror_________::', normalize-space(.))" /> <xsl:value-of
select="concat('ror_________::', normalize-space(.))"
/>
</xsl:when> </xsl:when>
<xsl:otherwise> <xsl:otherwise>
<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" /> <xsl:value-of
select="concat('ror_________::https://ror.org/', normalize-space(.))"
/>
</xsl:otherwise> </xsl:otherwise>
</xsl:choose> </xsl:choose>
</oaf:relation> </oaf:relation>
@ -435,7 +503,8 @@
<oaf:deletedbyinference>false</oaf:deletedbyinference> <oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.89</oaf:trust> <oaf:trust>0.89</oaf:trust>
<oaf:inferenceprovenance/> <oaf:inferenceprovenance/>
<oaf:provenanceaction classid="sysimport:crosswalk:aggregator" <oaf:provenanceaction
classid="sysimport:crosswalk:aggregator"
classname="sysimport:crosswalk:aggregator" classname="sysimport:crosswalk:aggregator"
schemeid="dnet:provenanceActions" schemeid="dnet:provenanceActions"
schemename="dnet:provenanceActions"/> schemename="dnet:provenanceActions"/>