improved parallelization on transformation job
This commit is contained in:
parent
3ae67b7a1d
commit
cdfe01bbae
|
@ -11,6 +11,9 @@ import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.function.Function;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoder;
|
import org.apache.spark.sql.Encoder;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -26,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.message.MessageSender;
|
import eu.dnetlib.dhp.message.MessageSender;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
||||||
|
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ -33,6 +37,8 @@ public class TransformSparkJobNode {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(TransformSparkJobNode.class);
|
private static final Logger log = LoggerFactory.getLogger(TransformSparkJobNode.class);
|
||||||
|
|
||||||
|
private static int RECORDS_PER_TASK = 200;
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -67,6 +73,11 @@ public class TransformSparkJobNode {
|
||||||
final String dateOfTransformation = parser.get("dateOfTransformation");
|
final String dateOfTransformation = parser.get("dateOfTransformation");
|
||||||
log.info(String.format("dateOfTransformation: %s", dateOfTransformation));
|
log.info(String.format("dateOfTransformation: %s", dateOfTransformation));
|
||||||
|
|
||||||
|
final Integer rpt = Optional
|
||||||
|
.ofNullable(parser.get("recordsPerTask"))
|
||||||
|
.map(Integer::valueOf)
|
||||||
|
.orElse(RECORDS_PER_TASK);
|
||||||
|
|
||||||
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
|
||||||
final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService);
|
final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService);
|
||||||
|
@ -79,12 +90,12 @@ public class TransformSparkJobNode {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
transformRecords(
|
transformRecords(
|
||||||
parser.getObjectMap(), isLookupService, spark, inputPath, outputBasePath);
|
parser.getObjectMap(), isLookupService, spark, inputPath, outputBasePath, rpt);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void transformRecords(final Map<String, String> args, final ISLookUpService isLookUpService,
|
public static void transformRecords(final Map<String, String> args, final ISLookUpService isLookUpService,
|
||||||
final SparkSession spark, final String inputPath, final String outputBasePath)
|
final SparkSession spark, final String inputPath, final String outputBasePath, final Integer rpt)
|
||||||
throws DnetTransformationException, IOException {
|
throws DnetTransformationException, IOException {
|
||||||
|
|
||||||
final LongAccumulator totalItems = spark.sparkContext().longAccumulator(CONTENT_TOTALITEMS);
|
final LongAccumulator totalItems = spark.sparkContext().longAccumulator(CONTENT_TOTALITEMS);
|
||||||
|
@ -99,18 +110,25 @@ public class TransformSparkJobNode {
|
||||||
final String workflowId = args.get("workflowId");
|
final String workflowId = args.get("workflowId");
|
||||||
log.info("workflowId is {}", workflowId);
|
log.info("workflowId is {}", workflowId);
|
||||||
|
|
||||||
final MessageSender messageSender = new MessageSender(dnetMessageManagerURL, workflowId);
|
MapFunction<MetadataRecord, MetadataRecord> x = TransformationFactory
|
||||||
try (AggregatorReport report = new AggregatorReport(messageSender)) {
|
.getTransformationPlugin(args, ct, isLookUpService);
|
||||||
try {
|
|
||||||
final Dataset<MetadataRecord> mdstore = spark
|
final Dataset<MetadataRecord> inputMDStore = spark
|
||||||
.read()
|
.read()
|
||||||
.format("parquet")
|
.format("parquet")
|
||||||
.load(inputPath)
|
.load(inputPath)
|
||||||
.as(encoder)
|
.as(encoder);
|
||||||
.map(
|
|
||||||
TransformationFactory.getTransformationPlugin(args, ct, isLookUpService),
|
final long totalInput = inputMDStore.count();
|
||||||
encoder);
|
|
||||||
saveDataset(mdstore, outputBasePath + MDSTORE_DATA_PATH);
|
final MessageSender messageSender = new MessageSender(dnetMessageManagerURL, workflowId);
|
||||||
|
try (AggregatorReport report = new AggregatorReport(messageSender)) {
|
||||||
|
try {
|
||||||
|
JavaRDD<MetadataRecord> mdstore = inputMDStore
|
||||||
|
.javaRDD()
|
||||||
|
.repartition(getRepartitionNumber(totalInput, rpt))
|
||||||
|
.map((Function<MetadataRecord, MetadataRecord>) x::call);
|
||||||
|
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
|
||||||
|
|
||||||
log.info("Transformed item " + ct.getProcessedItems().count());
|
log.info("Transformed item " + ct.getProcessedItems().count());
|
||||||
log.info("Total item " + ct.getTotalItems().count());
|
log.info("Total item " + ct.getTotalItems().count());
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
package eu.dnetlib.dhp.transformation.xslt;
|
package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.Serializable;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
@ -15,7 +16,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
|
|
||||||
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> {
|
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable {
|
||||||
|
|
||||||
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
||||||
|
|
||||||
|
@ -27,6 +28,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
|
|
||||||
private final long dateOfTransformation;
|
private final long dateOfTransformation;
|
||||||
|
|
||||||
|
private final VocabularyGroup vocabularies;
|
||||||
|
|
||||||
public XSLTTransformationFunction(
|
public XSLTTransformationFunction(
|
||||||
final AggregationCounter aggregationCounter,
|
final AggregationCounter aggregationCounter,
|
||||||
final String transformationRule,
|
final String transformationRule,
|
||||||
|
@ -35,6 +38,7 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
throws Exception {
|
throws Exception {
|
||||||
this.aggregationCounter = aggregationCounter;
|
this.aggregationCounter = aggregationCounter;
|
||||||
this.transformationRule = transformationRule;
|
this.transformationRule = transformationRule;
|
||||||
|
this.vocabularies = vocabularies;
|
||||||
this.dateOfTransformation = dateOfTransformation;
|
this.dateOfTransformation = dateOfTransformation;
|
||||||
cleanFunction = new Cleaner(vocabularies);
|
cleanFunction = new Cleaner(vocabularies);
|
||||||
}
|
}
|
||||||
|
@ -73,4 +77,24 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public AggregationCounter getAggregationCounter() {
|
||||||
|
return aggregationCounter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTransformationRule() {
|
||||||
|
return transformationRule;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Cleaner getCleanFunction() {
|
||||||
|
return cleanFunction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getDateOfTransformation() {
|
||||||
|
return dateOfTransformation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public VocabularyGroup getVocabularies() {
|
||||||
|
return vocabularies;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,12 @@
|
||||||
<name>dnetMessageManagerURL</name>
|
<name>dnetMessageManagerURL</name>
|
||||||
<description>The URI of the Dnet Message Manager</description>
|
<description>The URI of the Dnet Message Manager</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>recordsPerTask</name>
|
||||||
|
<value>200</value>
|
||||||
|
<description>The URI of the Dnet Message Manager</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="BeginRead"/>
|
<start to="BeginRead"/>
|
||||||
|
@ -103,6 +109,7 @@
|
||||||
<arg>--transformationPlugin</arg><arg>${transformationPlugin}</arg>
|
<arg>--transformationPlugin</arg><arg>${transformationPlugin}</arg>
|
||||||
<arg>--transformationRuleId</arg><arg>${transformationRuleId}</arg>
|
<arg>--transformationRuleId</arg><arg>${transformationRuleId}</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
<arg>--recordsPerTask</arg><arg>${recordsPerTask}</arg>
|
||||||
<arg>--workflowId</arg><arg>${workflowId}</arg>
|
<arg>--workflowId</arg><arg>${workflowId}</arg>
|
||||||
<arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg>
|
<arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
|
|
@ -48,6 +48,12 @@
|
||||||
"paramDescription": "the identifier of the dnet Workflow",
|
"paramDescription": "the identifier of the dnet Workflow",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"paramName": "rpt",
|
||||||
|
"paramLongName": "recordsPerTask",
|
||||||
|
"paramDescription": "the number of records transformed by a single Task",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"paramName": "tp",
|
"paramName": "tp",
|
||||||
"paramLongName": "transformationPlugin",
|
"paramLongName": "transformationPlugin",
|
||||||
|
|
|
@ -180,7 +180,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
|
||||||
TransformSparkJobNode
|
TransformSparkJobNode
|
||||||
.transformRecords(
|
.transformRecords(
|
||||||
parameters, isLookUpService, spark, mdStoreV2.getHdfsPath() + MDSTORE_DATA_PATH,
|
parameters, isLookUpService, spark, mdStoreV2.getHdfsPath() + MDSTORE_DATA_PATH,
|
||||||
mdStoreCleanedVersion.getHdfsPath());
|
mdStoreCleanedVersion.getHdfsPath(), 200);
|
||||||
|
|
||||||
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
||||||
final Dataset<MetadataRecord> mOutput = spark
|
final Dataset<MetadataRecord> mOutput = spark
|
||||||
|
|
|
@ -167,7 +167,8 @@ public class TransformationJobTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
||||||
|
|
||||||
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output);
|
TransformSparkJobNode
|
||||||
|
.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output, 200);
|
||||||
|
|
||||||
// TODO introduce useful assertions
|
// TODO introduce useful assertions
|
||||||
|
|
||||||
|
@ -221,7 +222,8 @@ public class TransformationJobTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
||||||
|
|
||||||
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output);
|
TransformSparkJobNode
|
||||||
|
.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output, 200);
|
||||||
|
|
||||||
// TODO introduce useful assertions
|
// TODO introduce useful assertions
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,13 @@
|
||||||
<xsl:stylesheet
|
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
version="2.0"
|
|
||||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
|
||||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
|
||||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
|
||||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||||
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||||
|
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||||
|
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||||
exclude-result-prefixes="xsl vocabulary dateCleaner">
|
exclude-result-prefixes="xsl vocabulary dateCleaner" version="2.0">
|
||||||
<xsl:param name="varOfficialName"/>
|
<xsl:param name="varOfficialName"/>
|
||||||
<xsl:param name="varDsType"/>
|
<xsl:param name="varDsType"/>
|
||||||
<xsl:param name="varDataSourceId"/>
|
<xsl:param name="varDataSourceId"/>
|
||||||
|
|
||||||
<xsl:param name="varFP7" select="'corda_______::'"/>
|
<xsl:param name="varFP7" select="'corda_______::'"/>
|
||||||
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
<xsl:param name="varH2020" select="'corda__h2020::'"/>
|
||||||
<xsl:param name="varAKA" select="'aka_________::'"/>
|
<xsl:param name="varAKA" select="'aka_________::'"/>
|
||||||
|
@ -34,29 +31,22 @@
|
||||||
<xsl:param name="varTARA" select="'taraexp_____::'"/><!-- TARA awaiting DOI from André -->
|
<xsl:param name="varTARA" select="'taraexp_____::'"/><!-- TARA awaiting DOI from André -->
|
||||||
<xsl:param name="varTUBITAK" select="'tubitakf____::'"/>
|
<xsl:param name="varTUBITAK" select="'tubitakf____::'"/>
|
||||||
<xsl:param name="varWT" select="'wt__________::'"/>
|
<xsl:param name="varWT" select="'wt__________::'"/>
|
||||||
|
|
||||||
<xsl:param name="index" select="0"/>
|
<xsl:param name="index" select="0"/>
|
||||||
<xsl:param name="transDate" select="current-dateTime()"/>
|
<xsl:param name="transDate" select="current-dateTime()"/>
|
||||||
|
|
||||||
|
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<xsl:variable name="datasourcePrefix" select="normalize-space(//oaf:datasourceprefix)"/>
|
<xsl:variable name="datasourcePrefix" select="normalize-space(//oaf:datasourceprefix)"/>
|
||||||
<xsl:call-template name="validRecord"/>
|
<xsl:call-template name="validRecord"/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
|
|
||||||
<xsl:template name="validRecord">
|
<xsl:template name="validRecord">
|
||||||
<record>
|
<record>
|
||||||
<xsl:apply-templates select="//*[local-name() = 'header']"/>
|
<xsl:apply-templates select="//*[local-name() = 'header']"/>
|
||||||
|
|
||||||
<metadata>
|
<metadata>
|
||||||
<xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = 'resource']"/>
|
<xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = 'resource']"/>
|
||||||
|
|
||||||
<xsl:if test="//*[local-name()='date']/@dateType='Available'">
|
<xsl:if test="//*[local-name()='date']/@dateType='Available'">
|
||||||
<xsl:variable name='varEmbargoEndDate'
|
<xsl:variable name="varEmbargoEndDate"
|
||||||
select="dateCleaner:dateISO(normalize-space(//*[local-name()='date'][@dateType='Available']))"/>
|
select="dateCleaner:dateISO(normalize-space(//*[local-name()='date'][@dateType='Available']))"/>
|
||||||
<xsl:choose>
|
<xsl:choose>
|
||||||
<xsl:when test="string-length($varEmbargoEndDate) > 0">
|
<xsl:when test="string-length($varEmbargoEndDate) > 0">
|
||||||
<oaf:embargoenddate>
|
<oaf:embargoenddate>
|
||||||
<xsl:value-of select="$varEmbargoEndDate"/>
|
<xsl:value-of select="$varEmbargoEndDate"/>
|
||||||
</oaf:embargoenddate>
|
</oaf:embargoenddate>
|
||||||
|
@ -68,55 +58,44 @@
|
||||||
</xsl:otherwise>
|
</xsl:otherwise>
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
|
||||||
<dr:CobjCategory>
|
<dr:CobjCategory>
|
||||||
<xsl:variable name="varCobjCategory"
|
<xsl:variable name="varCobjCategory"
|
||||||
select="vocabulary:clean( //*[local-name()='resourceType']/@resourceTypeGeneral, 'dnet:publication_resource')"/>
|
select="vocabulary:clean( //*[local-name()='resourceType']/@resourceTypeGeneral, 'dnet:publication_resource')"/>
|
||||||
<xsl:variable name="varSuperType"
|
<xsl:variable name="varSuperType"
|
||||||
select="vocabulary:clean( $varCobjCategory, 'dnet:result_typologies')"/>
|
select="vocabulary:clean( $varCobjCategory, 'dnet:result_typologies')"/>
|
||||||
|
|
||||||
<xsl:attribute name="type">
|
<xsl:attribute name="type">
|
||||||
<xsl:value-of select="$varSuperType"/>
|
<xsl:value-of select="$varSuperType"/>
|
||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
<xsl:value-of select="$varCobjCategory"/>
|
<xsl:value-of select="$varCobjCategory"/>
|
||||||
</dr:CobjCategory>
|
</dr:CobjCategory><!-- review status --><!-- Zenodo -->
|
||||||
|
<xsl:variable name="varRefereedConvt"
|
||||||
<!-- review status -->
|
select="for $i in (//*[local-name()='resourceType']/(., @resourceTypeGeneral), //*[local-name()='version']) return vocabulary:clean(normalize-space($i), 'dnet:review_levels')"/>
|
||||||
<!-- Zenodo -->
|
|
||||||
<xsl:variable name="varRefereedConvt" select="for $i in (//*[local-name()='resourceType']/(., @resourceTypeGeneral), //*[local-name()='version'])
|
|
||||||
return vocabulary:clean(normalize-space($i), 'dnet:review_levels')"/>
|
|
||||||
<xsl:variable name="varRefereedIdntf"
|
<xsl:variable name="varRefereedIdntf"
|
||||||
select="//*[local-name()=('identifier', 'alternateIdentifier')][matches(lower-case(.), '.*([\s\-\.\\_/:]|%[2-7][0-9A-F])pre([\s\-\.\\_/:]|%[2-7][0-9A-F])?prints?([\s\-\.\\_/:%].*|$)')]/'0002' "/>
|
select="//*[local-name()=('identifier', 'alternateIdentifier')][matches(lower-case(.), '.*([\s\-\.\\_/:]|%[2-7][0-9A-F])pre([\s\-\.\\_/:]|%[2-7][0-9A-F])?prints?([\s\-\.\\_/:%].*|$)')]/'0002' "/>
|
||||||
<xsl:variable name="varRefereedReltn"
|
<xsl:variable name="varRefereedReltn"
|
||||||
select="//*[local-name()='relatedIdentifier'][./@relationType/lower-case(.)='isreviewedby']/'0001' "/>
|
select="//*[local-name()='relatedIdentifier'][./@relationType/lower-case(.)='isreviewedby']/'0001' "/>
|
||||||
<xsl:variable name="varRefereedVersn" select="(//*[local-name()='version'][matches(lower-case(.), '.*peer[\s\-\.\\_/:%]?reviewed.*')]/'0001',
|
<xsl:variable name="varRefereedVersn"
|
||||||
//*[local-name()='version'][matches(normalize-space(lower-case(.)), '^(v|vs|version|rel|release)?[\s\.\-_]*0$')]/'0002',
|
select="(//*[local-name()='version'][matches(lower-case(.), '.*peer[\s\-\.\\_/:%]?reviewed.*')]/'0001', //*[local-name()='version'][matches(normalize-space(lower-case(.)), '^(v|vs|version|rel|release)?[\s\.\-_]*0$')]/'0002', //*[local-name()='version'][matches(lower-case(.), '(^|[\s\-\.\\_/:%].*)(beta|draft|trial|test)([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='version'][matches(lower-case(.), '.*submi(tted|ssion|ttal).*')]/'0002') "/>
|
||||||
//*[local-name()='version'][matches(lower-case(.), '(^|[\s\-\.\\_/:%].*)(beta|draft|trial|test)([\s\-\.\\_/:%].*|$)')]/'0002',
|
<xsl:variable name="varRefereedOther"
|
||||||
//*[local-name()='version'][matches(lower-case(.), '.*submi(tted|ssion|ttal).*')]/'0002') "/>
|
select="(//*[local-name()='publisher'][matches(lower-case(.), '.*[\s\-\.\\_/:%]pre[\s\-\.\\_/:%]?prints?([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='description'][matches(lower-case(.), '^peer[\s\-\.\\_/:%]?reviewed$')]/'0001', //*[local-name()='description'][matches(lower-case(.), '^pre[\s\-\.\\_/:%]?prints?$')]/'0002') "/>
|
||||||
<xsl:variable name="varRefereedOther" select="(//*[local-name()='publisher'][matches(lower-case(.), '.*[\s\-\.\\_/:%]pre[\s\-\.\\_/:%]?prints?([\s\-\.\\_/:%].*|$)')]/'0002',
|
|
||||||
//*[local-name()='description'][matches(lower-case(.), '^peer[\s\-\.\\_/:%]?reviewed$')]/'0001',
|
|
||||||
//*[local-name()='description'][matches(lower-case(.), '^pre[\s\-\.\\_/:%]?prints?$')]/'0002') "/>
|
|
||||||
<xsl:variable name="varRefereed"
|
<xsl:variable name="varRefereed"
|
||||||
select="($varRefereedConvt, $varRefereedIdntf, $varRefereedReltn, $varRefereedVersn, $varRefereedOther)"/>
|
select="($varRefereedConvt, $varRefereedIdntf, $varRefereedReltn, $varRefereedVersn, $varRefereedOther)"/>
|
||||||
<xsl:choose>
|
<xsl:choose>
|
||||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||||
<oaf:refereed>
|
<oaf:refereed>
|
||||||
<xsl:value-of select="'0001'"/>
|
<xsl:value-of select="'0001'"/>
|
||||||
</oaf:refereed>
|
</oaf:refereed>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||||
<oaf:refereed>
|
<oaf:refereed>
|
||||||
<xsl:value-of select="'0002'"/>
|
<xsl:value-of select="'0002'"/>
|
||||||
</oaf:refereed>
|
</oaf:refereed>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
|
|
||||||
<oaf:dateAccepted>
|
<oaf:dateAccepted>
|
||||||
<xsl:value-of
|
<xsl:value-of select="dateCleaner:dateISO(normalize-space(//*[local-name()='publicationYear']))"/>
|
||||||
select="dateCleaner:dateISO(normalize-space(//*[local-name()='publicationYear']))"/>
|
|
||||||
</oaf:dateAccepted>
|
</oaf:dateAccepted>
|
||||||
<xsl:choose>
|
<xsl:choose>
|
||||||
|
|
||||||
<xsl:when
|
<xsl:when
|
||||||
test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')]">
|
test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'info:eu-repo/semantics')]">
|
||||||
<oaf:accessrights>
|
<oaf:accessrights>
|
||||||
|
@ -147,17 +126,12 @@
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:otherwise>
|
</xsl:otherwise>
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
|
|
||||||
<oaf:language>
|
<oaf:language>
|
||||||
<xsl:value-of
|
<xsl:value-of select="vocabulary:clean(//*[local-name()='language'], 'dnet:languages')"/>
|
||||||
select="vocabulary:clean(//*[local-name()='language'], 'dnet:languages')"/>
|
|
||||||
</oaf:language>
|
</oaf:language>
|
||||||
|
|
||||||
|
|
||||||
<xsl:for-each
|
<xsl:for-each
|
||||||
select="//*[local-name()='nameIdentifier'][contains(., 'info:eu-repo/grantAgreement/')], //*[local-name()='fundingReference']/*[local-name()='awardNumber']">
|
select="//*[local-name()='nameIdentifier'][contains(., 'info:eu-repo/grantAgreement/')], //*[local-name()='fundingReference']/*[local-name()='awardNumber']">
|
||||||
<xsl:choose>
|
<xsl:choose>
|
||||||
|
|
||||||
<xsl:when
|
<xsl:when
|
||||||
test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i') or ../*[local-name() = 'funderIdentifier' and . = '10.13039/100011102']">
|
test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i') or ../*[local-name() = 'funderIdentifier' and . = '10.13039/100011102']">
|
||||||
<oaf:projectid>
|
<oaf:projectid>
|
||||||
|
@ -308,13 +282,10 @@
|
||||||
select="concat($varWT, replace(normalize-space(.), '(info:eu-repo/grantagreement/wt/.*?/)([^/]*)(/.*)?', '$2', 'i'))"/>
|
select="concat($varWT, replace(normalize-space(.), '(info:eu-repo/grantagreement/wt/.*?/)([^/]*)(/.*)?', '$2', 'i'))"/>
|
||||||
</oaf:projectid>
|
</oaf:projectid>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
|
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:for-each>
|
</xsl:for-each>
|
||||||
|
|
||||||
<xsl:for-each select="//*[local-name()='relatedIdentifier']">
|
<xsl:for-each select="//*[local-name()='relatedIdentifier']">
|
||||||
<xsl:if
|
<xsl:if test="starts-with(./text(), 'https://zenodo.org/communities/')">
|
||||||
test="starts-with(./text(), 'https://zenodo.org/communities/')">
|
|
||||||
<oaf:concept>
|
<oaf:concept>
|
||||||
<xsl:attribute name="id">
|
<xsl:attribute name="id">
|
||||||
<xsl:value-of select="./text()"/>
|
<xsl:value-of select="./text()"/>
|
||||||
|
@ -322,7 +293,6 @@
|
||||||
</oaf:concept>
|
</oaf:concept>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
</xsl:for-each>
|
</xsl:for-each>
|
||||||
|
|
||||||
<oaf:hostedBy>
|
<oaf:hostedBy>
|
||||||
<xsl:attribute name="name">
|
<xsl:attribute name="name">
|
||||||
<xsl:value-of select="$varOfficialName"/>
|
<xsl:value-of select="$varOfficialName"/>
|
||||||
|
@ -343,24 +313,19 @@
|
||||||
<xsl:copy-of select="//*[local-name() = 'about']"/>
|
<xsl:copy-of select="//*[local-name() = 'about']"/>
|
||||||
</record>
|
</record>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="node()|@*">
|
<xsl:template match="node()|@*">
|
||||||
<xsl:copy>
|
<xsl:copy>
|
||||||
<xsl:apply-templates select="node()|@*"/>
|
<xsl:apply-templates select="node()|@*"/>
|
||||||
</xsl:copy>
|
</xsl:copy>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="//*[local-name() = 'metadata']//*[local-name() = 'resource']">
|
<xsl:template match="//*[local-name() = 'metadata']//*[local-name() = 'resource']">
|
||||||
<xsl:copy>
|
<xsl:copy>
|
||||||
<xsl:apply-templates select="node()|@*"/>
|
<xsl:apply-templates select="node()|@*"/>
|
||||||
</xsl:copy>
|
</xsl:copy>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers']">
|
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers']">
|
||||||
<xsl:element name="alternateIdentifiers" namespace="http://www.openarchives.org/OAI/2.0/">
|
<xsl:element name="alternateIdentifiers" namespace="http://www.openarchives.org/OAI/2.0/">
|
||||||
|
|
||||||
<xsl:copy-of select="./*"/>
|
<xsl:copy-of select="./*"/>
|
||||||
|
|
||||||
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='Handle']">
|
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='Handle']">
|
||||||
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
|
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
|
||||||
<xsl:attribute name="alternateIdentifierType">
|
<xsl:attribute name="alternateIdentifierType">
|
||||||
|
@ -370,7 +335,6 @@
|
||||||
select="concat('http://hdl.handle.net/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
|
select="concat('http://hdl.handle.net/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
|
||||||
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='URN']">
|
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='URN']">
|
||||||
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
|
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
|
||||||
<xsl:attribute name="alternateIdentifierType">
|
<xsl:attribute name="alternateIdentifierType">
|
||||||
|
@ -380,7 +344,6 @@
|
||||||
select="concat('http://nbn-resolving.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
|
select="concat('http://nbn-resolving.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
|
||||||
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='DOI']">
|
<xsl:if test="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='DOI']">
|
||||||
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
|
<xsl:element name="alternateIdentifier" namespace="http://www.openarchives.org/OAI/2.0/">
|
||||||
<xsl:attribute name="alternateIdentifierType">
|
<xsl:attribute name="alternateIdentifierType">
|
||||||
|
@ -390,11 +353,8 @@
|
||||||
select="concat('http://dx.doi.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
|
select="concat('http://dx.doi.org/', //*[local-name() = 'resource']/*[local-name()='identifier'])"/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='identifier']">
|
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='identifier']">
|
||||||
<xsl:copy-of select="."/>
|
<xsl:copy-of select="."/>
|
||||||
<xsl:if test="not(//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers'])">
|
<xsl:if test="not(//*[local-name() = 'resource']/*[local-name()='alternateIdentifiers'])">
|
||||||
|
@ -404,8 +364,7 @@
|
||||||
<xsl:attribute name="alternateIdentifierType">
|
<xsl:attribute name="alternateIdentifierType">
|
||||||
<xsl:value-of select="'URL'"/>
|
<xsl:value-of select="'URL'"/>
|
||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
<xsl:value-of
|
<xsl:value-of select="concat('http://hdl.handle.net/', .)"/>
|
||||||
select="concat('http://hdl.handle.net/', .)"/>
|
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:if test=".[@identifierType='URN']">
|
<xsl:if test=".[@identifierType='URN']">
|
||||||
|
@ -413,8 +372,7 @@
|
||||||
<xsl:attribute name="alternateIdentifierType">
|
<xsl:attribute name="alternateIdentifierType">
|
||||||
<xsl:value-of select="'URL'"/>
|
<xsl:value-of select="'URL'"/>
|
||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
<xsl:value-of
|
<xsl:value-of select="concat('http://nbn-resolving.org/', .)"/>
|
||||||
select="concat('http://nbn-resolving.org/', .)"/>
|
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:if test=".[@identifierType='DOI']">
|
<xsl:if test=".[@identifierType='DOI']">
|
||||||
|
@ -422,17 +380,12 @@
|
||||||
<xsl:attribute name="alternateIdentifierType">
|
<xsl:attribute name="alternateIdentifierType">
|
||||||
<xsl:value-of select="'URL'"/>
|
<xsl:value-of select="'URL'"/>
|
||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
<xsl:value-of
|
<xsl:value-of select="concat('http://dx.doi.org/', .)"/>
|
||||||
select="concat('http://dx.doi.org/', .)"/>
|
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
|
|
||||||
<xsl:template match="//*[local-name() = 'header']">
|
<xsl:template match="//*[local-name() = 'header']">
|
||||||
<xsl:copy>
|
<xsl:copy>
|
||||||
<xsl:apply-templates select="node()|@*"/>
|
<xsl:apply-templates select="node()|@*"/>
|
||||||
|
@ -441,5 +394,4 @@
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:copy>
|
</xsl:copy>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
</xsl:stylesheet>
|
</xsl:stylesheet>
|
Loading…
Reference in New Issue