From 6ff234d81bd4cdec1c1c1745b24a7c3901e90afb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 1 Feb 2021 13:56:05 +0100 Subject: [PATCH] Implemented a first prototype of incremental harvesting and trasformation using readlock --- dhp-common/pom.xml | 6 ++ .../common/AggregationUtility.java | 28 ++++++ .../GenerateNativeStoreSparkJob.java | 97 +++++++++++++++---- .../transformation/TransformSparkJobNode.java | 39 ++++---- .../transformation/TransformationFactory.java | 6 +- .../collection_input_parameters.json | 6 ++ .../collection/oozie_app/config-default.xml | 4 + .../dhp/collection/oozie_app/workflow.xml | 33 ++++++- .../oozie_app/config-default.xml | 4 + .../dhp/transformation/oozie_app/workflow.xml | 96 +++++++++++++++--- .../transformation_input_parameters.json | 10 +- .../eu/dnetlib/dhp/transform/ext_simple.xsl | 4 +- .../eu/dnetlib/dhp/transform/input.xml | 96 ++++++------------ 13 files changed, 297 insertions(+), 132 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index b295bc1f1..6eb2e0358 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -98,6 +98,12 @@ dnet-pace-core + + org.apache.httpcomponents + httpclient + + + eu.dnetlib.dhp dhp-schemas diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java new file mode 100644 index 000000000..1f5ed27cb --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java @@ -0,0 +1,28 @@ + +package eu.dnetlib.dhp.aggregation.common; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.spark.sql.SparkSession; + +public class AggregationUtility { + + public static void writeTotalSizeOnHDFS(final SparkSession spark, final Long total, final String path) + throws IOException { + + FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); + + FSDataOutputStream output = fs.create(new Path(path)); + + final BufferedOutputStream os = new BufferedOutputStream(output); + + os.write(total.toString().getBytes(StandardCharsets.UTF_8)); + + os.close(); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index b28327a40..466ddcd21 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -5,9 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.*; import java.nio.charset.StandardCharsets; +import java.util.Collections; import java.util.Objects; import java.util.Optional; -import java.util.Properties; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -20,10 +20,9 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoder; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.Node; @@ -34,19 +33,62 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; -import eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode; +import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication; -import eu.dnetlib.dhp.common.rest.DNetRestClient; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.Provenance; -import eu.dnetlib.message.MessageManager; +import scala.Tuple2; public class GenerateNativeStoreSparkJob { private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); private static final String DATASET_NAME = "/store"; + public static class MDStoreAggregator extends Aggregator { + + @Override + public MetadataRecord zero() { + return new MetadataRecord(); + } + + @Override + public MetadataRecord reduce(MetadataRecord b, MetadataRecord a) { + + return getLatestRecord(b, a); + } + + private MetadataRecord getLatestRecord(MetadataRecord b, MetadataRecord a) { + if (b == null) + return a; + + if (a == null) + return b; + return (a.getDateOfCollection() > b.getDateOfCollection()) ? a : b; + } + + @Override + public MetadataRecord merge(MetadataRecord b, MetadataRecord a) { + return getLatestRecord(b, a); + } + + @Override + public MetadataRecord finish(MetadataRecord j) { + return j; + } + + @Override + public Encoder bufferEncoder() { + return Encoders.kryo(MetadataRecord.class); + } + + @Override + public Encoder outputEncoder() { + return Encoders.kryo(MetadataRecord.class); + } + + } + public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -70,6 +112,12 @@ public class GenerateNativeStoreSparkJob { final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); + String readMdStoreVersionParam = parser.get("readMdStoreVersion"); + log.info("readMdStoreVersion is {}", readMdStoreVersionParam); + + final MDStoreVersion readMdStoreVersion = StringUtils.isBlank(readMdStoreVersionParam) ? null + : jsonMapper.readValue(readMdStoreVersionParam, MDStoreVersion.class); + Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) @@ -77,6 +125,9 @@ public class GenerateNativeStoreSparkJob { log.info("isSparkSessionManaged: {}", isSparkSessionManaged); SparkConf conf = new SparkConf(); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(Collections.singleton(MetadataRecord.class).toArray(new Class[] {})); + runWithSparkSession( conf, isSparkSessionManaged, @@ -105,8 +156,27 @@ public class GenerateNativeStoreSparkJob { .distinct(); final Encoder encoder = Encoders.bean(MetadataRecord.class); + Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); + if (readMdStoreVersion != null) { + // INCREMENTAL MODE + + Dataset currentMdStoreVersion = spark + .read() + .load(readMdStoreVersion.getHdfsPath() + DATASET_NAME) + .as(encoder); + TypedColumn aggregator = new MDStoreAggregator().toColumn(); + + mdstore = currentMdStoreVersion + .union(mdstore) + .groupByKey( + (MapFunction) MetadataRecord::getId, + Encoders.STRING()) + .agg(aggregator) + .map((MapFunction, MetadataRecord>) Tuple2::_2, encoder); + + } mdstore .write() .mode(SaveMode.Overwrite) @@ -116,17 +186,8 @@ public class GenerateNativeStoreSparkJob { final Long total = mdstore.count(); - FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); - - FSDataOutputStream output = fs.create(new Path(currentVersion.getHdfsPath() + "/size")); - - final BufferedOutputStream os = new BufferedOutputStream(output); - - os.write(total.toString().getBytes(StandardCharsets.UTF_8)); - - os.close(); + AggregationUtility.writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size"); }); - } public static MetadataRecord parseRecord( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index c6ed5a1e3..b9df902a1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -3,14 +3,11 @@ package eu.dnetlib.dhp.transformation; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.ByteArrayInputStream; -import java.util.HashMap; +import java.io.IOException; import java.util.Map; -import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -18,25 +15,18 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.apache.spark.util.LongAccumulator; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; +import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper; -import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; -import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.message.Message; -import eu.dnetlib.message.MessageManager; -import eu.dnetlib.message.MessageType; public class TransformSparkJobNode { @@ -59,10 +49,14 @@ public class TransformSparkJobNode { .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String inputPath = parser.get("mdstoreInputPath"); - final String outputPath = parser.get("mdstoreOutputPath"); + final String mdstoreInputVersion = parser.get("mdstoreInputVersion"); + final String mdstoreOutputVersion = parser.get("mdstoreOutputVersion"); // TODO this variable will be used after implementing Messaging with DNet Aggregator + final ObjectMapper jsonMapper = new ObjectMapper(); + final MDStoreVersion nativeMdStoreVersion = jsonMapper.readValue(mdstoreInputVersion, MDStoreVersion.class); + final MDStoreVersion cleanedMdStoreVersion = jsonMapper.readValue(mdstoreOutputVersion, MDStoreVersion.class); + final String isLookupUrl = parser.get("isLookupUrl"); log.info(String.format("isLookupUrl: %s", isLookupUrl)); @@ -72,11 +66,14 @@ public class TransformSparkJobNode { runWithSparkSession( conf, isSparkSessionManaged, - spark -> transformRecords(parser.getObjectMap(), isLookupService, spark, inputPath, outputPath)); + spark -> transformRecords( + parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath(), + cleanedMdStoreVersion.getHdfsPath())); } public static void transformRecords(final Map args, final ISLookUpService isLookUpService, - final SparkSession spark, final String inputPath, final String outputPath) throws DnetTransformationException { + final SparkSession spark, final String inputPath, final String outputPath) + throws DnetTransformationException, IOException { final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems"); final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems"); @@ -86,11 +83,13 @@ public class TransformSparkJobNode { final Dataset mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder); final MapFunction XSLTTransformationFunction = TransformationFactory .getTransformationPlugin(args, ct, isLookUpService); - mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath); + mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath + "/store"); log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Total item " + ct.getTotalItems().count()); log.info("Transformation Error item " + ct.getErrorItems().count()); + + AggregationUtility.writeTotalSizeOnHDFS(spark, ct.getProcessedItems().count(), outputPath + "/size"); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index fbaef1d1f..d1f896964 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -30,13 +30,13 @@ public class TransformationFactory { log.info("Transformation plugin required " + transformationPlugin); switch (transformationPlugin) { case "XSLT_TRANSFORM": { - final String transformationRuleName = jobArgument.get("transformationRuleTitle"); - if (StringUtils.isBlank(transformationRuleName)) + final String transformationRuleId = jobArgument.get("transformationRuleId"); + if (StringUtils.isBlank(transformationRuleId)) throw new DnetTransformationException("Missing Parameter transformationRule"); final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService); final String transformationRule = queryTransformationRuleFromIS( - transformationRuleName, isLookupService); + transformationRuleId, isLookupService); final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json index c1aa03bcd..987f004bb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json @@ -35,6 +35,12 @@ "paramDescription": "the Metadata Store Version Info", "paramRequired": true }, + { + "paramName": "rmv", + "paramLongName": "readMdStoreVersion", + "paramDescription": "the Read Lock Metadata Store Version bean", + "paramRequired": false + }, { "paramName": "w", "paramLongName": "workflowId", diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml index 2e0ed9aee..e77dd09c9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml @@ -15,4 +15,8 @@ oozie.action.sharelib.for.spark spark2 + + oozie.launcher.mapreduce.user.classpath.first + true + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml index 527ec1727..9c213bee5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml @@ -51,7 +51,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -61,7 +61,7 @@ ${wf:conf('collectionMode') eq 'REFRESH'} ${wf:conf('collectionMode') eq 'INCREMENTAL'} - + @@ -99,7 +99,7 @@ --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} - + @@ -123,9 +123,10 @@ --provenance${dataSourceInfo} --xpath${identifierPath} --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --readMdStoreVersion${wf:actionData('BeginRead')['mdStoreReadLockVersion']} - + @@ -133,7 +134,7 @@ ${wf:conf('collectionMode') eq 'REFRESH'} ${wf:conf('collectionMode') eq 'INCREMENTAL'} - + @@ -161,6 +162,28 @@ + + + ${wf:conf('collectionMode') eq 'REFRESH'} + ${wf:conf('collectionMode') eq 'INCREMENTAL'} + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml index 2e0ed9aee..e77dd09c9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml @@ -15,4 +15,8 @@ oozie.action.sharelib.for.spark spark2 + + oozie.launcher.mapreduce.user.classpath.first + true + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml index b36bc3766..aff87dc79 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml @@ -1,25 +1,25 @@ - mdstoreInputPath - the path of the native MDStore + mdStoreInputId + the identifier of the native MDStore - - mdstoreOutputPath + mdStoreOutputId + the identifier of the cleaned MDStore + + + mdStoreManagerURI the path of the cleaned mdstore - - transformationRuleTitle + transformationRuleId The transformation Rule to apply - transformationPlugin The transformation Plugin - dateOfTransformation The timestamp of the transformation date @@ -28,11 +28,34 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_LOCK + --mdStoreID${mdStoreInputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreOutputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + yarn @@ -49,18 +72,63 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --mdstoreInputPath${mdstoreInputPath} - --mdstoreOutputPath${mdstoreOutputPath} + --mdstoreInputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdstoreOutputVersion${wf:actionData('BeginRead')['mdStoreReadLockVersion']} --dateOfTransformation${dateOfTransformation} --transformationPlugin${transformationPlugin} - --transformationRuleTitle${transformationRuleTitle} - - + --transformationRuleId${transformationRuleId} + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json index cbd2f25ab..d92698de5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json @@ -13,19 +13,19 @@ }, { "paramName": "i", - "paramLongName": "mdstoreInputPath", - "paramDescription": "the path of the sequencial file to read", + "paramLongName": "mdstoreInputVersion", + "paramDescription": "the mdStore Version bean of the Input", "paramRequired": true }, { "paramName": "o", - "paramLongName": "mdstoreOutputPath", - "paramDescription": "the path of the result DataFrame on HDFS", + "paramLongName": "mdstoreOutputVersion", + "paramDescription": "the mdStore Version bean of the Output", "paramRequired": true }, { "paramName": "tr", - "paramLongName": "transformationRuleTitle", + "paramLongName": "transformationRuleId", "paramDescription": "the transformation Rule to apply to the input MDStore", "paramRequired": true }, diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl index 9e5f84c11..becd3a05e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl @@ -9,7 +9,9 @@ - + + + diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml index 8efb3c487..ebe8e919b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml @@ -1,68 +1,32 @@ - - - - od______2294::00029b7f0a2a7e090e55b625a9079d83 - oai:pub.uni-bielefeld.de:2578942 - 2018-11-23T15:15:33.974+01:00 - od______2294 - oai:pub.uni-bielefeld.de:2578942 - 2018-07-24T13:01:16Z - conference - ddc:000 - conferenceFtxt - driver - open_access - - - - Mobile recommendation agents making online use of visual attention information at the point of sale - Pfeiffer, Thies - Pfeiffer, Jella - Meißner, Martin - Davis, Fred - Riedl, René - Jan, vom Brocke - Léger, Pierre-Majorique - Randolph, Adriane - Mobile Cognitive Assistance Systems - Information Systems - ddc:000 - We aim to utilize online information about visual attention for developing mobile recommendation agents (RAs) for use at the point of sale. Up to now, most RAs are focussed exclusively at personalization in an e-commerce setting. Very little is known, however, about mobile RAs that offer information and assistance at the point of sale based on individual-level feature based preference models (Murray and Häubl 2009). Current attempts provide information about products at the point of sale by manually scanning barcodes or using RFID (Kowatsch et al. 2011, Heijden 2005), e.g. using specific apps for smartphones. We argue that an online access to the current visual attention of the user offers a much larger potential. Integrating mobile eye tracking into ordinary glasses would yield a direct benefit of applying neuroscience methods in the user’s everyday life. First, learning from consumers’ attentional processes over time and adapting recommendations based on this learning allows us to provide very accurate and relevant recommendations, potentially increasing the perceived usefulness. Second, our proposed system needs little explicit user input (no scanning or navigation on screen) making it easy to use. Thus, instead of learning from click behaviour and past customer ratings, as it is the case in the e-commerce setting, the mobile RA learns from eye movements by participating online in every day decision processes. We argue that mobile RAs should be built based on current research in human judgment and decision making (Murray et al. 2010). In our project, we therefore follow a two-step approach: In the empirical basic research stream, we aim to understand the user’s interaction with the product shelf: the actions and patterns of user’s behaviour (eye movements, gestures, approaching a product closer) and their correspondence to the user’s informational needs. In the empirical system development stream, we create prototypes of mobile RAs and test experimentally the factors that influence the user’s adoption. For example, we suggest that a user’s involvement in the process, such as a need for exact nutritional information or for assistance (e.g., reading support for elderly) will influence the user’s intention to use such as system. The experiments are conducted both in our immersive virtual reality supermarket presented in a CAVE, where we can also easily display information to the user and track the eye movement in great accuracy, as well as in real-world supermarkets (see Figure 1), so that the findings can be better generalized to natural decision situations (Gidlöf et al. 2013). In a first pilot study with five randomly chosen participants in a supermarket, we evaluated which sort of mobile RAs consumers favour in order to get a first impression of the user’s acceptance of the technology. Figure 1 shows an excerpt of one consumer’s eye movements during a decision process. First results show long eye cascades and short fixations on many products in situations where users are uncertain and in need for support. Furthermore, we find a surprising acceptance of the technology itself throughout all ages (23 – 61 years). At the same time, consumers express serious fear of being manipulated by such a technology. For that reason, they strongly prefer the information to be provided by trusted third party or shared with family members and friends (see also Murray and Häubl 2009). Our pilot will be followed by a larger field experiment in March in order to learn more about factors that influence the user’s acceptance as well as the eye movement patterns that reflect typical phases of decision processes and indicate the need for support by a RA. - 2013 - info:eu-repo/semantics/conferenceObject - doc-type:conferenceObject - text - https://pub.uni-bielefeld.de/record/2578942 - https://pub.uni-bielefeld.de/download/2578942/2602478 - Pfeiffer T, Pfeiffer J, Meißner M. Mobile recommendation agents making online use of visual attention information at the point of sale. In: Davis F, Riedl R, Jan vom B, Léger P-M, Randolph A, eds. Proceedings of the Gmunden Retreat on NeuroIS 2013. 2013: 3-3. - eng - info:eu-repo/semantics/openAccess + +
+ oai:lib.psnc.pl:278 + 2011-08-25T15:17:13Z + PSNCRepository:PSNCExternalRepository:exhibitions + PSNCRepository:PSNCExternalRepository:Departments + PSNCRepository:PSNCExternalRepository:Departments:NetworkServices + PSNCRepository:PSNCExternalRepository + PSNCRepository:PSNCExternalRepository:publications + PSNCRepository +
+ + + + + + + + + + + + + + + + - - - - http://pub.uni-bielefeld.de/oai - oai:pub.uni-bielefeld.de:2578942 - 2018-07-24T13:01:16Z - http://www.openarchives.org/OAI/2.0/oai_dc/ - - - - false - false - 0.9 - - - - -
+ \ No newline at end of file