improved parallelization on transformation job

This commit is contained in:
Sandro La Bruzzo 2021-04-19 15:14:52 +02:00
parent 3ae67b7a1d
commit cdfe01bbae
7 changed files with 98 additions and 89 deletions

View File

@ -11,6 +11,9 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -26,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -33,6 +37,8 @@ public class TransformSparkJobNode {
private static final Logger log = LoggerFactory.getLogger(TransformSparkJobNode.class); private static final Logger log = LoggerFactory.getLogger(TransformSparkJobNode.class);
private static int RECORDS_PER_TASK = 200;
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -67,6 +73,11 @@ public class TransformSparkJobNode {
final String dateOfTransformation = parser.get("dateOfTransformation"); final String dateOfTransformation = parser.get("dateOfTransformation");
log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); log.info(String.format("dateOfTransformation: %s", dateOfTransformation));
final Integer rpt = Optional
.ofNullable(parser.get("recordsPerTask"))
.map(Integer::valueOf)
.orElse(RECORDS_PER_TASK);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService); final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService);
@ -79,12 +90,12 @@ public class TransformSparkJobNode {
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
transformRecords( transformRecords(
parser.getObjectMap(), isLookupService, spark, inputPath, outputBasePath); parser.getObjectMap(), isLookupService, spark, inputPath, outputBasePath, rpt);
}); });
} }
public static void transformRecords(final Map<String, String> args, final ISLookUpService isLookUpService, public static void transformRecords(final Map<String, String> args, final ISLookUpService isLookUpService,
final SparkSession spark, final String inputPath, final String outputBasePath) final SparkSession spark, final String inputPath, final String outputBasePath, final Integer rpt)
throws DnetTransformationException, IOException { throws DnetTransformationException, IOException {
final LongAccumulator totalItems = spark.sparkContext().longAccumulator(CONTENT_TOTALITEMS); final LongAccumulator totalItems = spark.sparkContext().longAccumulator(CONTENT_TOTALITEMS);
@ -99,18 +110,25 @@ public class TransformSparkJobNode {
final String workflowId = args.get("workflowId"); final String workflowId = args.get("workflowId");
log.info("workflowId is {}", workflowId); log.info("workflowId is {}", workflowId);
final MessageSender messageSender = new MessageSender(dnetMessageManagerURL, workflowId); MapFunction<MetadataRecord, MetadataRecord> x = TransformationFactory
try (AggregatorReport report = new AggregatorReport(messageSender)) { .getTransformationPlugin(args, ct, isLookUpService);
try {
final Dataset<MetadataRecord> mdstore = spark final Dataset<MetadataRecord> inputMDStore = spark
.read() .read()
.format("parquet") .format("parquet")
.load(inputPath) .load(inputPath)
.as(encoder) .as(encoder);
.map(
TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), final long totalInput = inputMDStore.count();
encoder);
saveDataset(mdstore, outputBasePath + MDSTORE_DATA_PATH); final MessageSender messageSender = new MessageSender(dnetMessageManagerURL, workflowId);
try (AggregatorReport report = new AggregatorReport(messageSender)) {
try {
JavaRDD<MetadataRecord> mdstore = inputMDStore
.javaRDD()
.repartition(getRepartitionNumber(totalInput, rpt))
.map((Function<MetadataRecord, MetadataRecord>) x::call);
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Transformed item " + ct.getProcessedItems().count());
log.info("Total item " + ct.getTotalItems().count()); log.info("Total item " + ct.getTotalItems().count());

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.transformation.xslt; package eu.dnetlib.dhp.transformation.xslt;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.Serializable;
import java.io.StringWriter; import java.io.StringWriter;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@ -15,7 +16,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import net.sf.saxon.s9api.*; import net.sf.saxon.s9api.*;
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> { public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable {
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
@ -27,6 +28,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
private final long dateOfTransformation; private final long dateOfTransformation;
private final VocabularyGroup vocabularies;
public XSLTTransformationFunction( public XSLTTransformationFunction(
final AggregationCounter aggregationCounter, final AggregationCounter aggregationCounter,
final String transformationRule, final String transformationRule,
@ -35,6 +38,7 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
throws Exception { throws Exception {
this.aggregationCounter = aggregationCounter; this.aggregationCounter = aggregationCounter;
this.transformationRule = transformationRule; this.transformationRule = transformationRule;
this.vocabularies = vocabularies;
this.dateOfTransformation = dateOfTransformation; this.dateOfTransformation = dateOfTransformation;
cleanFunction = new Cleaner(vocabularies); cleanFunction = new Cleaner(vocabularies);
} }
@ -73,4 +77,24 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
public AggregationCounter getAggregationCounter() {
return aggregationCounter;
}
public String getTransformationRule() {
return transformationRule;
}
public Cleaner getCleanFunction() {
return cleanFunction;
}
public long getDateOfTransformation() {
return dateOfTransformation;
}
public VocabularyGroup getVocabularies() {
return vocabularies;
}
} }

View File

@ -37,6 +37,12 @@
<name>dnetMessageManagerURL</name> <name>dnetMessageManagerURL</name>
<description>The URI of the Dnet Message Manager</description> <description>The URI of the Dnet Message Manager</description>
</property> </property>
<property>
<name>recordsPerTask</name>
<value>200</value>
<description>The URI of the Dnet Message Manager</description>
</property>
</parameters> </parameters>
<start to="BeginRead"/> <start to="BeginRead"/>
@ -103,6 +109,7 @@
<arg>--transformationPlugin</arg><arg>${transformationPlugin}</arg> <arg>--transformationPlugin</arg><arg>${transformationPlugin}</arg>
<arg>--transformationRuleId</arg><arg>${transformationRuleId}</arg> <arg>--transformationRuleId</arg><arg>${transformationRuleId}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--recordsPerTask</arg><arg>${recordsPerTask}</arg>
<arg>--workflowId</arg><arg>${workflowId}</arg> <arg>--workflowId</arg><arg>${workflowId}</arg>
<arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg> <arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg>
</spark> </spark>

View File

@ -48,6 +48,12 @@
"paramDescription": "the identifier of the dnet Workflow", "paramDescription": "the identifier of the dnet Workflow",
"paramRequired": true "paramRequired": true
}, },
{
"paramName": "rpt",
"paramLongName": "recordsPerTask",
"paramDescription": "the number of records transformed by a single Task",
"paramRequired": false
},
{ {
"paramName": "tp", "paramName": "tp",
"paramLongName": "transformationPlugin", "paramLongName": "transformationPlugin",

View File

@ -180,7 +180,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
TransformSparkJobNode TransformSparkJobNode
.transformRecords( .transformRecords(
parameters, isLookUpService, spark, mdStoreV2.getHdfsPath() + MDSTORE_DATA_PATH, parameters, isLookUpService, spark, mdStoreV2.getHdfsPath() + MDSTORE_DATA_PATH,
mdStoreCleanedVersion.getHdfsPath()); mdStoreCleanedVersion.getHdfsPath(), 200);
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class); final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
final Dataset<MetadataRecord> mOutput = spark final Dataset<MetadataRecord> mOutput = spark

View File

@ -167,7 +167,8 @@ public class TransformationJobTest extends AbstractVocabularyTest {
}).collect(Collectors.toMap(data -> data[0], data -> data[1])); }).collect(Collectors.toMap(data -> data[0], data -> data[1]));
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output); TransformSparkJobNode
.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output, 200);
// TODO introduce useful assertions // TODO introduce useful assertions
@ -221,7 +222,8 @@ public class TransformationJobTest extends AbstractVocabularyTest {
}).collect(Collectors.toMap(data -> data[0], data -> data[1])); }).collect(Collectors.toMap(data -> data[0], data -> data[1]));
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output); TransformSparkJobNode
.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output, 200);
// TODO introduce useful assertions // TODO introduce useful assertions

View File

@ -1,16 +1,13 @@
<xsl:stylesheet <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
exclude-result-prefixes="xsl vocabulary dateCleaner"> exclude-result-prefixes="xsl vocabulary dateCleaner" version="2.0">
<xsl:param name="varOfficialName"/> <xsl:param name="varOfficialName"/>
<xsl:param name="varDsType"/> <xsl:param name="varDsType"/>
<xsl:param name="varDataSourceId"/> <xsl:param name="varDataSourceId"/>
<xsl:param name="varFP7" select="'corda_______::'"/> <xsl:param name="varFP7" select="'corda_______::'"/>
<xsl:param name="varH2020" select="'corda__h2020::'"/> <xsl:param name="varH2020" select="'corda__h2020::'"/>
<xsl:param name="varAKA" select="'aka_________::'"/> <xsl:param name="varAKA" select="'aka_________::'"/>
@ -34,29 +31,22 @@
<xsl:param name="varTARA" select="'taraexp_____::'"/><!-- TARA awaiting DOI from André --> <xsl:param name="varTARA" select="'taraexp_____::'"/><!-- TARA awaiting DOI from André -->
<xsl:param name="varTUBITAK" select="'tubitakf____::'"/> <xsl:param name="varTUBITAK" select="'tubitakf____::'"/>
<xsl:param name="varWT" select="'wt__________::'"/> <xsl:param name="varWT" select="'wt__________::'"/>
<xsl:param name="index" select="0"/> <xsl:param name="index" select="0"/>
<xsl:param name="transDate" select="current-dateTime()"/> <xsl:param name="transDate" select="current-dateTime()"/>
<xsl:template match="/"> <xsl:template match="/">
<xsl:variable name="datasourcePrefix" select="normalize-space(//oaf:datasourceprefix)"/> <xsl:variable name="datasourcePrefix" select="normalize-space(//oaf:datasourceprefix)"/>
<xsl:call-template name="validRecord"/> <xsl:call-template name="validRecord"/>
</xsl:template> </xsl:template>
<xsl:template name="validRecord"> <xsl:template name="validRecord">
<record> <record>
<xsl:apply-templates select="//*[local-name() = 'header']"/> <xsl:apply-templates select="//*[local-name() = 'header']"/>
<metadata> <metadata>
<xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = 'resource']"/> <xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = 'resource']"/>
<xsl:if test="//*[local-name()='date']/@dateType='Available'"> <xsl:if test="//*[local-name()='date']/@dateType='Available'">
<xsl:variable name='varEmbargoEndDate' <xsl:variable name="varEmbargoEndDate"
select="dateCleaner:dateISO(normalize-space(//*[local-name()='date'][@dateType='Available']))"/> select="dateCleaner:dateISO(normalize-space(//*[local-name()='date'][@dateType='Available']))"/>
<xsl:choose> <xsl:choose>
<xsl:when test="string-length($varEmbargoEndDate) > 0"> <xsl:when test="string-length($varEmbargoEndDate) &gt; 0">
<oaf:embargoenddate> <oaf:embargoenddate>
<xsl:value-of select="$varEmbargoEndDate"/> <xsl:value-of select="$varEmbargoEndDate"/>
</oaf:embargoenddate> </oaf:embargoenddate>
@ -68,55 +58,44 @@
</xsl:otherwise> </xsl:otherwise>
</xsl:choose> </xsl:choose>
</xsl:if> </xsl:if>
<dr:CobjCategory> <dr:CobjCategory>
<xsl:variable name="varCobjCategory" <xsl:variable name="varCobjCategory"
select="vocabulary:clean( //*[local-name()='resourceType']/@resourceTypeGeneral, 'dnet:publication_resource')"/> select="vocabulary:clean( //*[local-name()='resourceType']/@resourceTypeGeneral, 'dnet:publication_resource')"/>
<xsl:variable name="varSuperType" <xsl:variable name="varSuperType"
select="vocabulary:clean( $varCobjCategory, 'dnet:result_typologies')"/> select="vocabulary:clean( $varCobjCategory, 'dnet:result_typologies')"/>
<xsl:attribute name="type"> <xsl:attribute name="type">
<xsl:value-of select="$varSuperType"/> <xsl:value-of select="$varSuperType"/>
</xsl:attribute> </xsl:attribute>
<xsl:value-of select="$varCobjCategory"/> <xsl:value-of select="$varCobjCategory"/>
</dr:CobjCategory> </dr:CobjCategory><!-- review status --><!-- Zenodo -->
<xsl:variable name="varRefereedConvt"
<!-- review status --> select="for $i in (//*[local-name()='resourceType']/(., @resourceTypeGeneral), //*[local-name()='version']) return vocabulary:clean(normalize-space($i), 'dnet:review_levels')"/>
<!-- Zenodo -->
<xsl:variable name="varRefereedConvt" select="for $i in (//*[local-name()='resourceType']/(., @resourceTypeGeneral), //*[local-name()='version'])
return vocabulary:clean(normalize-space($i), 'dnet:review_levels')"/>
<xsl:variable name="varRefereedIdntf" <xsl:variable name="varRefereedIdntf"
select="//*[local-name()=('identifier', 'alternateIdentifier')][matches(lower-case(.), '.*([\s\-\.\\_/:]|%[2-7][0-9A-F])pre([\s\-\.\\_/:]|%[2-7][0-9A-F])?prints?([\s\-\.\\_/:%].*|$)')]/'0002' "/> select="//*[local-name()=('identifier', 'alternateIdentifier')][matches(lower-case(.), '.*([\s\-\.\\_/:]|%[2-7][0-9A-F])pre([\s\-\.\\_/:]|%[2-7][0-9A-F])?prints?([\s\-\.\\_/:%].*|$)')]/'0002' "/>
<xsl:variable name="varRefereedReltn" <xsl:variable name="varRefereedReltn"
select="//*[local-name()='relatedIdentifier'][./@relationType/lower-case(.)='isreviewedby']/'0001' "/> select="//*[local-name()='relatedIdentifier'][./@relationType/lower-case(.)='isreviewedby']/'0001' "/>
<xsl:variable name="varRefereedVersn" select="(//*[local-name()='version'][matches(lower-case(.), '.*peer[\s\-\.\\_/:%]?reviewed.*')]/'0001', <xsl:variable name="varRefereedVersn"
//*[local-name()='version'][matches(normalize-space(lower-case(.)), '^(v|vs|version|rel|release)?[\s\.\-_]*0$')]/'0002', select="(//*[local-name()='version'][matches(lower-case(.), '.*peer[\s\-\.\\_/:%]?reviewed.*')]/'0001', //*[local-name()='version'][matches(normalize-space(lower-case(.)), '^(v|vs|version|rel|release)?[\s\.\-_]*0$')]/'0002', //*[local-name()='version'][matches(lower-case(.), '(^|[\s\-\.\\_/:%].*)(beta|draft|trial|test)([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='version'][matches(lower-case(.), '.*submi(tted|ssion|ttal).*')]/'0002') "/>
//*[local-name()='version'][matches(lower-case(.), '(^|[\s\-\.\\_/:%].*)(beta|draft|trial|test)([\s\-\.\\_/:%].*|$)')]/'0002', <xsl:variable name="varRefereedOther"
//*[local-name()='version'][matches(lower-case(.), '.*submi(tted|ssion|ttal).*')]/'0002') "/> select="(//*[local-name()='publisher'][matches(lower-case(.), '.*[\s\-\.\\_/:%]pre[\s\-\.\\_/:%]?prints?([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='description'][matches(lower-case(.), '^peer[\s\-\.\\_/:%]?reviewed$')]/'0001', //*[local-name()='description'][matches(lower-case(.), '^pre[\s\-\.\\_/:%]?prints?$')]/'0002') "/>
<xsl:variable name="varRefereedOther" select="(//*[local-name()='publisher'][matches(lower-case(.), '.*[\s\-\.\\_/:%]pre[\s\-\.\\_/:%]?prints?([\s\-\.\\_/:%].*|$)')]/'0002',
//*[local-name()='description'][matches(lower-case(.), '^peer[\s\-\.\\_/:%]?reviewed$')]/'0001',
//*[local-name()='description'][matches(lower-case(.), '^pre[\s\-\.\\_/:%]?prints?$')]/'0002') "/>
<xsl:variable name="varRefereed" <xsl:variable name="varRefereed"
select="($varRefereedConvt, $varRefereedIdntf, $varRefereedReltn, $varRefereedVersn, $varRefereedOther)"/> select="($varRefereedConvt, $varRefereedIdntf, $varRefereedReltn, $varRefereedVersn, $varRefereedOther)"/>
<xsl:choose> <xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) > 0"> <xsl:when test="count($varRefereed[. = '0001']) &gt; 0">
<oaf:refereed> <oaf:refereed>
<xsl:value-of select="'0001'"/> <xsl:value-of select="'0001'"/>
</oaf:refereed> </oaf:refereed>
</xsl:when> </xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) > 0"> <xsl:when test="count($varRefereed[. = '0002']) &gt; 0">
<oaf:refereed> <oaf:refereed>
<xsl:value-of select="'0002'"/> <xsl:value-of select="'0002'"/>
</oaf:refereed> </oaf:refereed>
</xsl:when> </xsl:when>
</xsl:choose> </xsl:choose>
<oaf:dateAccepted> <oaf:dateAccepted>
<xsl:value-of <xsl:value-of select="dateCleaner:dateISO(normalize-space(//*[local-name()='publicationYear']))"/>
select="dateCleaner:dateISO(normalize-space(//*[local-name()='publicationYear']))"/>
</oaf:dateAccepted> </oaf:dateAccepted>
<xsl:choose> <xsl:choose>
<xsl:when <xsl:when
test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')]"> test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')]">
<oaf:accessrights> <oaf:accessrights>
@ -147,17 +126,12 @@
</xsl:choose> </xsl:choose>
</xsl:otherwise> </xsl:otherwise>
</xsl:choose> </xsl:choose>
<oaf:language> <oaf:language>
<xsl:value-of <xsl:value-of select="vocabulary:clean(//*[local-name()='language'], 'dnet:languages')"/>
select="vocabulary:clean(//*[local-name()='language'], 'dnet:languages')"/>
</oaf:language> </oaf:language>
<xsl:for-each <xsl:for-each
select="//*[local-name()='nameIdentifier'][contains(., 'info:eu-repo/grantAgreement/')], //*[local-name()='fundingReference']/*[local-name()='awardNumber']"> select="//*[local-name()='nameIdentifier'][contains(., 'info:eu-repo/grantAgreement/')], //*[local-name()='fundingReference']/*[local-name()='awardNumber']">
<xsl:choose> <xsl:choose>
<xsl:when <xsl:when
test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i') or ../*[local-name() = 'funderIdentifier' and . = '10.13039/100011102']"> test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i') or ../*[local-name() = 'funderIdentifier' and . = '10.13039/100011102']">
<oaf:projectid> <oaf:projectid>
@ -308,13 +282,10 @@
select="concat($varWT, replace(normalize-space(.), '(info:eu-repo/grantagreement/wt/.*?/)([^/]*)(/.*)?', '$2', 'i'))"/> select="concat($varWT, replace(normalize-space(.), '(info:eu-repo/grantagreement/wt/.*?/)([^/]*)(/.*)?', '$2', 'i'))"/>
</oaf:projectid> </oaf:projectid>
</xsl:when> </xsl:when>
</xsl:choose> </xsl:choose>
</xsl:for-each> </xsl:for-each>
<xsl:for-each select="//*[local-name()='relatedIdentifier']"> <xsl:for-each select="//*[local-name()='relatedIdentifier']">
<xsl:if <xsl:if test="starts-with(./text(), 'https://zenodo.org/communities/')">
test="starts-with(./text(), 'https://zenodo.org/communities/')">
<oaf:concept> <oaf:concept>
<xsl:attribute name="id"> <xsl:attribute name="id">
<xsl:value-of select="./text()"/> <xsl:value-of select="./text()"/>
@ -322,7 +293,6 @@
</oaf:concept> </oaf:concept>
</xsl:if> </xsl:if>
</xsl:for-each> </xsl:for-each>
<oaf:hostedBy> <oaf:hostedBy>
<xsl:attribute name="name"> <xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/> <xsl:value-of select="$varOfficialName"/>
@ -343,24 +313,19 @@
<xsl:copy-of select="//*[local-name() = 'about']"/> <xsl:copy-of select="//*[local-name() = 'about']"/>
</record> </record>
</xsl:template> </xsl:template>
<xsl:template match="node()|@*"> <xsl:template match="node()|@*">
<xsl:copy> <xsl:copy>
<xsl:apply-templates select="node()|@*"/> <xsl:apply-templates select="node()|@*"/>
</xsl:copy> </xsl:copy>
</xsl:template> </xsl:template>
<xsl:template match="//*[local-name() = 'metadata']//*[local-name() = 'resource']"> <xsl:template match="//*[local-name() = 'metadata']//*[local-name() = 'resource']">
<xsl:copy> <xsl:copy>
<xsl:apply-templates select="node()|@*"/> <xsl:apply-templates select="node()|@*"/>
</xsl:copy> </xsl:copy>
</xsl:template> </xsl:template>
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers']"> <xsl:template match="//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers']">
<xsl:element name="alternateIdentifiers" namespace="http://www.openarchives.org/OAI/2.0/"> <xsl:element name="alternateIdentifiers" namespace="http://www.openarchives.org/OAI/2.0/">
<xsl:copy-of select="./*"/> <xsl:copy-of select="./*"/>
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='Handle']"> <xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='Handle']">
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/"> <xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
<xsl:attribute name="alternateIdentifierType"> <xsl:attribute name="alternateIdentifierType">
@ -370,7 +335,6 @@
select="concat('http://hdl.handle.net/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/> select="concat('http://hdl.handle.net/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
</xsl:element> </xsl:element>
</xsl:if> </xsl:if>
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='URN']"> <xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='URN']">
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/"> <xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
<xsl:attribute name="alternateIdentifierType"> <xsl:attribute name="alternateIdentifierType">
@ -380,7 +344,6 @@
select="concat('http://nbn-resolving.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/> select="concat('http://nbn-resolving.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
</xsl:element> </xsl:element>
</xsl:if> </xsl:if>
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='DOI']"> <xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='DOI']">
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/"> <xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
<xsl:attribute name="alternateIdentifierType"> <xsl:attribute name="alternateIdentifierType">
@ -390,11 +353,8 @@
select="concat('http://dx.doi.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/> select="concat('http://dx.doi.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
</xsl:element> </xsl:element>
</xsl:if> </xsl:if>
</xsl:element> </xsl:element>
</xsl:template> </xsl:template>
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='identifier']"> <xsl:template match="//*[local-name() = 'resource']/*[local-name()='identifier']">
<xsl:copy-of select="."/> <xsl:copy-of select="."/>
<xsl:if test="not(//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers'])"> <xsl:if test="not(//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers'])">
@ -404,8 +364,7 @@
<xsl:attribute name="alternateIdentifierType"> <xsl:attribute name="alternateIdentifierType">
<xsl:value-of select="'URL'"/> <xsl:value-of select="'URL'"/>
</xsl:attribute> </xsl:attribute>
<xsl:value-of <xsl:value-of select="concat('http://hdl.handle.net/', .)"/>
select="concat('http://hdl.handle.net/', .)"/>
</xsl:element> </xsl:element>
</xsl:if> </xsl:if>
<xsl:if test=".[@identifierType='URN']"> <xsl:if test=".[@identifierType='URN']">
@ -413,8 +372,7 @@
<xsl:attribute name="alternateIdentifierType"> <xsl:attribute name="alternateIdentifierType">
<xsl:value-of select="'URL'"/> <xsl:value-of select="'URL'"/>
</xsl:attribute> </xsl:attribute>
<xsl:value-of <xsl:value-of select="concat('http://nbn-resolving.org/', .)"/>
select="concat('http://nbn-resolving.org/', .)"/>
</xsl:element> </xsl:element>
</xsl:if> </xsl:if>
<xsl:if test=".[@identifierType='DOI']"> <xsl:if test=".[@identifierType='DOI']">
@ -422,17 +380,12 @@
<xsl:attribute name="alternateIdentifierType"> <xsl:attribute name="alternateIdentifierType">
<xsl:value-of select="'URL'"/> <xsl:value-of select="'URL'"/>
</xsl:attribute> </xsl:attribute>
<xsl:value-of <xsl:value-of select="concat('http://dx.doi.org/', .)"/>
select="concat('http://dx.doi.org/', .)"/>
</xsl:element> </xsl:element>
</xsl:if> </xsl:if>
</xsl:element> </xsl:element>
</xsl:if> </xsl:if>
</xsl:template> </xsl:template>
<xsl:template match="//*[local-name() = 'header']"> <xsl:template match="//*[local-name() = 'header']">
<xsl:copy> <xsl:copy>
<xsl:apply-templates select="node()|@*"/> <xsl:apply-templates select="node()|@*"/>
@ -441,5 +394,4 @@
</xsl:element> </xsl:element>
</xsl:copy> </xsl:copy>
</xsl:template> </xsl:template>
</xsl:stylesheet> </xsl:stylesheet>