Implemented a first prototype of incremental harvesting and trasformation using readlock

This commit is contained in:
Sandro La Bruzzo 2021-02-01 13:56:05 +01:00
parent b6b835ef49
commit 6ff234d81b
13 changed files with 297 additions and 132 deletions

View File

@ -98,6 +98,12 @@
<artifactId>dnet-pace-core</artifactId> <artifactId>dnet-pace-core</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas</artifactId>

View File

@ -0,0 +1,28 @@
package eu.dnetlib.dhp.aggregation.common;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.SparkSession;
public class AggregationUtility {
public static void writeTotalSizeOnHDFS(final SparkSession spark, final Long total, final String path)
throws IOException {
FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
FSDataOutputStream output = fs.create(new Path(path));
final BufferedOutputStream os = new BufferedOutputStream(output);
os.write(total.toString().getBytes(StandardCharsets.UTF_8));
os.close();
}
}

View File

@ -5,9 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.*; import java.io.*;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.Properties;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -20,10 +20,9 @@ import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoder; import org.apache.spark.sql.*;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
@ -34,19 +33,62 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode; import eu.dnetlib.dhp.aggregation.common.AggregationUtility;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication; import eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication;
import eu.dnetlib.dhp.common.rest.DNetRestClient;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.model.mdstore.Provenance; import eu.dnetlib.dhp.model.mdstore.Provenance;
import eu.dnetlib.message.MessageManager; import scala.Tuple2;
public class GenerateNativeStoreSparkJob { public class GenerateNativeStoreSparkJob {
private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class);
private static final String DATASET_NAME = "/store"; private static final String DATASET_NAME = "/store";
public static class MDStoreAggregator extends Aggregator<MetadataRecord, MetadataRecord, MetadataRecord> {
@Override
public MetadataRecord zero() {
return new MetadataRecord();
}
@Override
public MetadataRecord reduce(MetadataRecord b, MetadataRecord a) {
return getLatestRecord(b, a);
}
private MetadataRecord getLatestRecord(MetadataRecord b, MetadataRecord a) {
if (b == null)
return a;
if (a == null)
return b;
return (a.getDateOfCollection() > b.getDateOfCollection()) ? a : b;
}
@Override
public MetadataRecord merge(MetadataRecord b, MetadataRecord a) {
return getLatestRecord(b, a);
}
@Override
public MetadataRecord finish(MetadataRecord j) {
return j;
}
@Override
public Encoder<MetadataRecord> bufferEncoder() {
return Encoders.kryo(MetadataRecord.class);
}
@Override
public Encoder<MetadataRecord> outputEncoder() {
return Encoders.kryo(MetadataRecord.class);
}
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -70,6 +112,12 @@ public class GenerateNativeStoreSparkJob {
final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class);
String readMdStoreVersionParam = parser.get("readMdStoreVersion");
log.info("readMdStoreVersion is {}", readMdStoreVersionParam);
final MDStoreVersion readMdStoreVersion = StringUtils.isBlank(readMdStoreVersionParam) ? null
: jsonMapper.readValue(readMdStoreVersionParam, MDStoreVersion.class);
Boolean isSparkSessionManaged = Optional Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged")) .ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf) .map(Boolean::valueOf)
@ -77,6 +125,9 @@ public class GenerateNativeStoreSparkJob {
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(Collections.singleton(MetadataRecord.class).toArray(new Class[] {}));
runWithSparkSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
@ -105,8 +156,27 @@ public class GenerateNativeStoreSparkJob {
.distinct(); .distinct();
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class); final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
Dataset<MetadataRecord> mdstore = spark.createDataset(nativeStore.rdd(), encoder); Dataset<MetadataRecord> mdstore = spark.createDataset(nativeStore.rdd(), encoder);
if (readMdStoreVersion != null) {
// INCREMENTAL MODE
Dataset<MetadataRecord> currentMdStoreVersion = spark
.read()
.load(readMdStoreVersion.getHdfsPath() + DATASET_NAME)
.as(encoder);
TypedColumn<MetadataRecord, MetadataRecord> aggregator = new MDStoreAggregator().toColumn();
mdstore = currentMdStoreVersion
.union(mdstore)
.groupByKey(
(MapFunction<MetadataRecord, String>) MetadataRecord::getId,
Encoders.STRING())
.agg(aggregator)
.map((MapFunction<Tuple2<String, MetadataRecord>, MetadataRecord>) Tuple2::_2, encoder);
}
mdstore mdstore
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
@ -116,17 +186,8 @@ public class GenerateNativeStoreSparkJob {
final Long total = mdstore.count(); final Long total = mdstore.count();
FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); AggregationUtility.writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size");
FSDataOutputStream output = fs.create(new Path(currentVersion.getHdfsPath() + "/size"));
final BufferedOutputStream os = new BufferedOutputStream(output);
os.write(total.toString().getBytes(StandardCharsets.UTF_8));
os.close();
}); });
} }
public static MetadataRecord parseRecord( public static MetadataRecord parseRecord(

View File

@ -3,14 +3,11 @@ package eu.dnetlib.dhp.transformation;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.ByteArrayInputStream; import java.io.IOException;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
@ -18,25 +15,18 @@ import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
import eu.dnetlib.dhp.aggregation.common.AggregationUtility;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper;
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
public class TransformSparkJobNode { public class TransformSparkJobNode {
@ -59,10 +49,14 @@ public class TransformSparkJobNode {
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("mdstoreInputPath"); final String mdstoreInputVersion = parser.get("mdstoreInputVersion");
final String outputPath = parser.get("mdstoreOutputPath"); final String mdstoreOutputVersion = parser.get("mdstoreOutputVersion");
// TODO this variable will be used after implementing Messaging with DNet Aggregator // TODO this variable will be used after implementing Messaging with DNet Aggregator
final ObjectMapper jsonMapper = new ObjectMapper();
final MDStoreVersion nativeMdStoreVersion = jsonMapper.readValue(mdstoreInputVersion, MDStoreVersion.class);
final MDStoreVersion cleanedMdStoreVersion = jsonMapper.readValue(mdstoreOutputVersion, MDStoreVersion.class);
final String isLookupUrl = parser.get("isLookupUrl"); final String isLookupUrl = parser.get("isLookupUrl");
log.info(String.format("isLookupUrl: %s", isLookupUrl)); log.info(String.format("isLookupUrl: %s", isLookupUrl));
@ -72,11 +66,14 @@ public class TransformSparkJobNode {
runWithSparkSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> transformRecords(parser.getObjectMap(), isLookupService, spark, inputPath, outputPath)); spark -> transformRecords(
parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath(),
cleanedMdStoreVersion.getHdfsPath()));
} }
public static void transformRecords(final Map<String, String> args, final ISLookUpService isLookUpService, public static void transformRecords(final Map<String, String> args, final ISLookUpService isLookUpService,
final SparkSession spark, final String inputPath, final String outputPath) throws DnetTransformationException { final SparkSession spark, final String inputPath, final String outputPath)
throws DnetTransformationException, IOException {
final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems"); final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems");
final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems"); final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems");
@ -86,11 +83,13 @@ public class TransformSparkJobNode {
final Dataset<MetadataRecord> mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder); final Dataset<MetadataRecord> mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder);
final MapFunction<MetadataRecord, MetadataRecord> XSLTTransformationFunction = TransformationFactory final MapFunction<MetadataRecord, MetadataRecord> XSLTTransformationFunction = TransformationFactory
.getTransformationPlugin(args, ct, isLookUpService); .getTransformationPlugin(args, ct, isLookUpService);
mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath); mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath + "/store");
log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Transformed item " + ct.getProcessedItems().count());
log.info("Total item " + ct.getTotalItems().count()); log.info("Total item " + ct.getTotalItems().count());
log.info("Transformation Error item " + ct.getErrorItems().count()); log.info("Transformation Error item " + ct.getErrorItems().count());
AggregationUtility.writeTotalSizeOnHDFS(spark, ct.getProcessedItems().count(), outputPath + "/size");
} }
} }

View File

@ -30,13 +30,13 @@ public class TransformationFactory {
log.info("Transformation plugin required " + transformationPlugin); log.info("Transformation plugin required " + transformationPlugin);
switch (transformationPlugin) { switch (transformationPlugin) {
case "XSLT_TRANSFORM": { case "XSLT_TRANSFORM": {
final String transformationRuleName = jobArgument.get("transformationRuleTitle"); final String transformationRuleId = jobArgument.get("transformationRuleId");
if (StringUtils.isBlank(transformationRuleName)) if (StringUtils.isBlank(transformationRuleId))
throw new DnetTransformationException("Missing Parameter transformationRule"); throw new DnetTransformationException("Missing Parameter transformationRule");
final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService); final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService);
final String transformationRule = queryTransformationRuleFromIS( final String transformationRule = queryTransformationRuleFromIS(
transformationRuleName, isLookupService); transformationRuleId, isLookupService);
final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation"));
return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation,

View File

@ -35,6 +35,12 @@
"paramDescription": "the Metadata Store Version Info", "paramDescription": "the Metadata Store Version Info",
"paramRequired": true "paramRequired": true
}, },
{
"paramName": "rmv",
"paramLongName": "readMdStoreVersion",
"paramDescription": "the Read Lock Metadata Store Version bean",
"paramRequired": false
},
{ {
"paramName": "w", "paramName": "w",
"paramLongName": "workflowId", "paramLongName": "workflowId",

View File

@ -15,4 +15,8 @@
<name>oozie.action.sharelib.for.spark</name> <name>oozie.action.sharelib.for.spark</name>
<value>spark2</value> <value>spark2</value>
</property> </property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration> </configuration>

View File

@ -51,7 +51,7 @@
</global> </global>
<start to="StartTransaction"/> <start to="collection_mode"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
@ -61,7 +61,7 @@
<switch> <switch>
<case to="StartTransaction">${wf:conf('collectionMode') eq 'REFRESH'}</case> <case to="StartTransaction">${wf:conf('collectionMode') eq 'REFRESH'}</case>
<case to="BeginRead">${wf:conf('collectionMode') eq 'INCREMENTAL'}</case> <case to="BeginRead">${wf:conf('collectionMode') eq 'INCREMENTAL'}</case>
<default to="ImportDatacite"/> <default to="StartTransaction"/>
</switch> </switch>
</decision> </decision>
@ -99,7 +99,7 @@
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg> <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
</java> </java>
<ok to="GenerateNativeStoreSparkJob"/> <ok to="GenerateNativeStoreSparkJob"/>
<error to="RollBack"/> <error to="FailCollection"/>
</action> </action>
<action name="GenerateNativeStoreSparkJob"> <action name="GenerateNativeStoreSparkJob">
@ -123,9 +123,10 @@
<arg>--provenance</arg><arg>${dataSourceInfo}</arg> <arg>--provenance</arg><arg>${dataSourceInfo}</arg>
<arg>--xpath</arg><arg>${identifierPath}</arg> <arg>--xpath</arg><arg>${identifierPath}</arg>
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg> <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
<arg>--readMdStoreVersion</arg><arg>${wf:actionData('BeginRead')['mdStoreReadLockVersion']}</arg>
</spark> </spark>
<ok to="collection_mode_end"/> <ok to="collection_mode_end"/>
<error to="RollBack"/> <error to="FailCollection"/>
</action> </action>
@ -133,7 +134,7 @@
<switch> <switch>
<case to="CommitVersion">${wf:conf('collectionMode') eq 'REFRESH'}</case> <case to="CommitVersion">${wf:conf('collectionMode') eq 'REFRESH'}</case>
<case to="EndRead">${wf:conf('collectionMode') eq 'INCREMENTAL'}</case> <case to="EndRead">${wf:conf('collectionMode') eq 'INCREMENTAL'}</case>
<default to="ImportDatacite"/> <default to="CommitVersion"/>
</switch> </switch>
</decision> </decision>
@ -161,6 +162,28 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<decision name="FailCollection">
<switch>
<case to="RollBack">${wf:conf('collectionMode') eq 'REFRESH'}</case>
<case to="EndReadRollBack">${wf:conf('collectionMode') eq 'INCREMENTAL'}</case>
<default to="RollBack"/>
</switch>
</decision>
<action name="EndReadRollBack">
<java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
<arg>--action</arg><arg>READ_UNLOCK</arg>
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
<arg>--readMDStoreId</arg><arg>${wf:actionData('BeginRead')['mdStoreReadLockVersion']}</arg>
<capture-output/>
</java>
<ok to="RollBack"/>
<error to="Kill"/>
</action>
<action name="RollBack"> <action name="RollBack">
<java> <java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class> <main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>

View File

@ -15,4 +15,8 @@
<name>oozie.action.sharelib.for.spark</name> <name>oozie.action.sharelib.for.spark</name>
<value>spark2</value> <value>spark2</value>
</property> </property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration> </configuration>

View File

@ -1,25 +1,25 @@
<workflow-app name="Transformation_Workflow" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="Transformation_Workflow" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property> <property>
<name>mdstoreInputPath</name> <name>mdStoreInputId</name>
<description>the path of the native MDStore</description> <description>the identifier of the native MDStore</description>
</property> </property>
<property> <property>
<name>mdstoreOutputPath</name> <name>mdStoreOutputId</name>
<description>the identifier of the cleaned MDStore</description>
</property>
<property>
<name>mdStoreManagerURI</name>
<description>the path of the cleaned mdstore</description> <description>the path of the cleaned mdstore</description>
</property> </property>
<property> <property>
<name>transformationRuleTitle</name> <name>transformationRuleId</name>
<description>The transformation Rule to apply</description> <description>The transformation Rule to apply</description>
</property> </property>
<property> <property>
<name>transformationPlugin</name> <name>transformationPlugin</name>
<description>The transformation Plugin</description> <description>The transformation Plugin</description>
</property> </property>
<property> <property>
<name>dateOfTransformation</name> <name>dateOfTransformation</name>
<description>The timestamp of the transformation date</description> <description>The timestamp of the transformation date</description>
@ -28,11 +28,34 @@
</parameters> </parameters>
<start to="TransformJob"/> <start to="BeginRead"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
<action name="BeginRead">
<java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
<arg>--action</arg><arg>READ_LOCK</arg>
<arg>--mdStoreID</arg><arg>${mdStoreInputId}</arg>
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
<capture-output/>
</java>
<ok to="StartTransaction"/>
<error to="Kill"/>
</action>
<action name="StartTransaction">
<java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
<arg>--action</arg><arg>NEW_VERSION</arg>
<arg>--mdStoreID</arg><arg>${mdStoreOutputId}</arg>
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
<capture-output/>
</java>
<ok to="TransformJob"/>
<error to="EndReadRollBack"/>
</action>
<action name="TransformJob"> <action name="TransformJob">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
@ -49,18 +72,63 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts> </spark-opts>
<arg>--mdstoreInputPath</arg><arg>${mdstoreInputPath}</arg> <arg>--mdstoreInputVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
<arg>--mdstoreOutputPath</arg><arg>${mdstoreOutputPath}</arg> <arg>--mdstoreOutputVersion</arg><arg>${wf:actionData('BeginRead')['mdStoreReadLockVersion']}</arg>
<arg>--dateOfTransformation</arg><arg>${dateOfTransformation}</arg> <arg>--dateOfTransformation</arg><arg>${dateOfTransformation}</arg>
<arg>--transformationPlugin</arg><arg>${transformationPlugin}</arg> <arg>--transformationPlugin</arg><arg>${transformationPlugin}</arg>
<arg>--transformationRuleTitle</arg><arg>${transformationRuleTitle}</arg> <arg>--transformationRuleId</arg><arg>${transformationRuleId}</arg>
</spark> </spark>
<ok to="EndRead"/>
<error to="EndReadRollBack"/>
</action>
<action name="EndRead">
<java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
<arg>--action</arg><arg>READ_UNLOCK</arg>
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
<arg>--readMDStoreId</arg><arg>${wf:actionData('BeginRead')['mdStoreReadLockVersion']}</arg>
<capture-output/>
</java>
<ok to="CommitVersion"/>
<error to="Kill"/>
</action>
<action name="CommitVersion">
<java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
<arg>--action</arg><arg>COMMIT</arg>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
</java>
<ok to="End"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="EndReadRollBack">
<java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
<arg>--action</arg><arg>READ_UNLOCK</arg>
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
<arg>--readMDStoreId</arg><arg>${wf:actionData('BeginRead')['mdStoreReadLockVersion']}</arg>
<capture-output/>
</java>
<ok to="RollBack"/>
<error to="Kill"/>
</action>
<action name="RollBack">
<java>
<main-class>eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode</main-class>
<arg>--action</arg><arg>ROLLBACK</arg>
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
<arg>--mdStoreManagerURI</arg><arg>${mdStoreManagerURI}</arg>
</java>
<ok to="Kill"/>
<error to="Kill"/>
</action>
<end name="End"/> <end name="End"/>

View File

@ -13,19 +13,19 @@
}, },
{ {
"paramName": "i", "paramName": "i",
"paramLongName": "mdstoreInputPath", "paramLongName": "mdstoreInputVersion",
"paramDescription": "the path of the sequencial file to read", "paramDescription": "the mdStore Version bean of the Input",
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "o", "paramName": "o",
"paramLongName": "mdstoreOutputPath", "paramLongName": "mdstoreOutputVersion",
"paramDescription": "the path of the result DataFrame on HDFS", "paramDescription": "the mdStore Version bean of the Output",
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "tr", "paramName": "tr",
"paramLongName": "transformationRuleTitle", "paramLongName": "transformationRuleId",
"paramDescription": "the transformation Rule to apply to the input MDStore", "paramDescription": "the transformation Rule to apply to the input MDStore",
"paramRequired": true "paramRequired": true
}, },

View File

@ -9,7 +9,9 @@
<oai:record> <oai:record>
<xsl:copy-of select="//oai:header"/> <xsl:copy-of select="//oai:header"/>
<metadata> <metadata>
<xsl:for-each select="//oai:set">
<xsl:copy-of select="//oai:metadata/*"/>
<xsl:for-each select="//oai:setSpec">
<dr:CobjCategory><xsl:value-of select="vocabulary:clean(.,'dnet:publication_resource')"/></dr:CobjCategory> <dr:CobjCategory><xsl:value-of select="vocabulary:clean(.,'dnet:publication_resource')"/></dr:CobjCategory>
</xsl:for-each> </xsl:for-each>
</metadata> </metadata>

View File

@ -1,68 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?> <record xmlns="http://www.openarchives.org/OAI/2.0/">
<oai:record xmlns="http://namespace.openaire.eu/" <header>
xmlns:dc="http://purl.org/dc/elements/1.1/" <identifier>oai:lib.psnc.pl:278</identifier>
xmlns:dr="http://www.driver-repository.eu/namespace/dr" <datestamp>2011-08-25T15:17:13Z</datestamp>
xmlns:dri="http://www.driver-repository.eu/namespace/dri" <setSpec>PSNCRepository:PSNCExternalRepository:exhibitions</setSpec>
xmlns:oaf="http://namespace.openaire.eu/oaf" <setSpec>PSNCRepository:PSNCExternalRepository:Departments</setSpec>
xmlns:oai="http://www.openarchives.org/OAI/2.0/" <setSpec>PSNCRepository:PSNCExternalRepository:Departments:NetworkServices</setSpec>
xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <setSpec>PSNCRepository:PSNCExternalRepository</setSpec>
<oai:header> <setSpec>PSNCRepository:PSNCExternalRepository:publications</setSpec>
<dri:objIdentifier>od______2294::00029b7f0a2a7e090e55b625a9079d83</dri:objIdentifier> <setSpec>PSNCRepository</setSpec>
<dri:recordIdentifier>oai:pub.uni-bielefeld.de:2578942</dri:recordIdentifier> </header>
<dri:dateOfCollection>2018-11-23T15:15:33.974+01:00</dri:dateOfCollection> <metadata>
<oaf:datasourceprefix>od______2294</oaf:datasourceprefix> <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:pub.uni-bielefeld.de:2578942</identifier> xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2018-07-24T13:01:16Z</datestamp> xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">conference</setSpec> <dc:title xml:lang="pl">
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">ddc:000</setSpec> <![CDATA[Distributed Search Mechanisms in dLibra Digital Library Framework]]></dc:title>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">conferenceFtxt</setSpec> <dc:creator xml:lang="pl"><![CDATA[Mazurek, Cezary]]></dc:creator>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">driver</setSpec> <dc:creator xml:lang="pl"><![CDATA[Werla, Marcin]]></dc:creator>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">open_access</setSpec> <dc:date xml:lang="pl"><![CDATA[2005.10]]></dc:date>
</oai:header> <dc:type xml:lang="pl"><![CDATA[plakat]]></dc:type>
<metadata xmlns="http://www.openarchives.org/OAI/2.0/"> <dc:format xml:lang="pl"><![CDATA[image/jpeg]]></dc:format>
<oai_dc:dc xmlns="http://www.openarchives.org/OAI/2.0/oai_dc/" <dc:identifier><![CDATA[https://lib.psnc.pl/dlibra/docmetadata?showContent=true&id=278]]></dc:identifier>
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"> <dc:identifier><![CDATA[oai:lib.psnc.pl:278]]></dc:identifier>
<dc:title>Mobile recommendation agents making online use of visual attention information at the point of sale</dc:title> <dc:source xml:lang="pl">
<dc:creator>Pfeiffer, Thies</dc:creator> <![CDATA[CERN Workshop on Innovations in Scholarly Communication (OAI4)]]></dc:source>
<dc:creator>Pfeiffer, Jella</dc:creator> <dc:language xml:lang="pl"><![CDATA[eng]]></dc:language>
<dc:creator>Meißner, Martin</dc:creator> <dc:relation><![CDATA[oai:lib.psnc.pl:publication:339]]></dc:relation>
<dc:creator>Davis, Fred</dc:creator>
<dc:creator>Riedl, René</dc:creator>
<dc:creator>Jan, vom Brocke</dc:creator>
<dc:creator>Léger, Pierre-Majorique</dc:creator>
<dc:creator>Randolph, Adriane</dc:creator>
<dc:subject>Mobile Cognitive Assistance Systems
Information Systems</dc:subject>
<dc:subject>ddc:000</dc:subject>
<dc:description>We aim to utilize online information about visual attention for developing mobile recommendation agents (RAs) for use at the point of sale. Up to now, most RAs are focussed exclusively at personalization in an e-commerce setting. Very little is known, however, about mobile RAs that offer information and assistance at the point of sale based on individual-level feature based preference models (Murray and Häubl 2009). Current attempts provide information about products at the point of sale by manually scanning barcodes or using RFID (Kowatsch et al. 2011, Heijden 2005), e.g. using specific apps for smartphones. We argue that an online access to the current visual attention of the user offers a much larger potential. Integrating mobile eye tracking into ordinary glasses would yield a direct benefit of applying neuroscience methods in the users everyday life. First, learning from consumers attentional processes over time and adapting recommendations based on this learning allows us to provide very accurate and relevant recommendations, potentially increasing the perceived usefulness. Second, our proposed system needs little explicit user input (no scanning or navigation on screen) making it easy to use. Thus, instead of learning from click behaviour and past customer ratings, as it is the case in the e-commerce setting, the mobile RA learns from eye movements by participating online in every day decision processes. We argue that mobile RAs should be built based on current research in human judgment and decision making (Murray et al. 2010). In our project, we therefore follow a two-step approach: In the empirical basic research stream, we aim to understand the users interaction with the product shelf: the actions and patterns of users behaviour (eye movements, gestures, approaching a product closer) and their correspondence to the users informational needs. In the empirical system development stream, we create prototypes of mobile RAs and test experimentally the factors that influence the users adoption. For example, we suggest that a users involvement in the process, such as a need for exact nutritional information or for assistance (e.g., reading support for elderly) will influence the users intention to use such as system. The experiments are conducted both in our immersive virtual reality supermarket presented in a CAVE, where we can also easily display information to the user and track the eye movement in great accuracy, as well as in real-world supermarkets (see Figure 1), so that the findings can be better generalized to natural decision situations (Gidlöf et al. 2013). In a first pilot study with five randomly chosen participants in a supermarket, we evaluated which sort of mobile RAs consumers favour in order to get a first impression of the users acceptance of the technology. Figure 1 shows an excerpt of one consumers eye movements during a decision process. First results show long eye cascades and short fixations on many products in situations where users are uncertain and in need for support. Furthermore, we find a surprising acceptance of the technology itself throughout all ages (23 61 years). At the same time, consumers express serious fear of being manipulated by such a technology. For that reason, they strongly prefer the information to be provided by trusted third party or shared with family members and friends (see also Murray and Häubl 2009). Our pilot will be followed by a larger field experiment in March in order to learn more about factors that influence the users acceptance as well as the eye movement patterns that reflect typical phases of decision processes and indicate the need for support by a RA.</dc:description>
<dc:date>2013</dc:date>
<dc:type>info:eu-repo/semantics/conferenceObject</dc:type>
<dc:type>doc-type:conferenceObject</dc:type>
<dc:type>text</dc:type>
<dc:identifier>https://pub.uni-bielefeld.de/record/2578942</dc:identifier>
<dc:identifier>https://pub.uni-bielefeld.de/download/2578942/2602478</dc:identifier>
<dc:source>Pfeiffer T, Pfeiffer J, Meißner M. Mobile recommendation agents making online use of visual attention information at the point of sale. In: Davis F, Riedl R, Jan vom B, Léger P-M, Randolph A, eds. <em>Proceedings of the Gmunden Retreat on NeuroIS 2013</em>. 2013: 3-3.</dc:source>
<dc:language>eng</dc:language>
<dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
</oai_dc:dc> </oai_dc:dc>
</metadata> </metadata>
<about xmlns=""> </record>
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
<originDescription altered="true" harvestDate="2018-11-23T15:15:33.974+01:00">
<baseURL>http://pub.uni-bielefeld.de/oai</baseURL>
<identifier>oai:pub.uni-bielefeld.de:2578942</identifier>
<datestamp>2018-07-24T13:01:16Z</datestamp>
<metadataNamespace>http://www.openarchives.org/OAI/2.0/oai_dc/</metadataNamespace>
</originDescription>
</provenance>
<oaf:datainfo>
<oaf:inferred>false</oaf:inferred>
<oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.9</oaf:trust>
<oaf:inferenceprovenance/>
<oaf:provenanceaction classid="sysimport:crosswalk:repository"
classname="sysimport:crosswalk:repository"
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
</oaf:datainfo>
</about>
</oai:record>