From 027618003951bcdc51cbd60b7d09163696795386 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 29 Jan 2021 16:42:41 +0100 Subject: [PATCH 01/17] WIP mdstore transaction implemented on hadoop side --- .../mdstore/manager/common/model/MDStore.java | 12 +- .../common/model/MDStoreCurrentVersion.java | 8 +- .../manager/common/model/MDStoreVersion.java | 12 +- .../manager/common/model/MDStoreWithInfo.java | 13 +- .../collector/worker/model/ApiDescriptor.java | 2 +- .../dhp/common/rest/DNetRestClient.java | 54 ++++++ .../mdstore/MDStoreActionNode.java | 164 ++++++++++++++++++ .../GenerateNativeStoreSparkJob.java | 46 ++++- .../collection/plugin/CollectorPlugin.java | 2 +- .../plugin/oai/OaiCollectorPlugin.java | 2 +- .../collection/worker/CollectorWorker.java | 5 +- .../worker/CollectorWorkerApplication.java | 28 ++- .../datacite/oozie_app/config-default.xml | 5 + .../collection_input_parameters.json | 12 +- .../dhp/collection/collector_parameter.json | 28 ++- .../collection/mdstore_action_parameters.json | 45 +++++ .../dhp/collection/oozie_app/workflow.xml | 114 ++++++++++-- .../DnetCollectorWorkerApplicationTests.java | 9 +- 18 files changed, 495 insertions(+), 66 deletions(-) rename dhp-common/src/main/java/eu/dnetlib/{ => dhp}/collector/worker/model/ApiDescriptor.java (93%) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/mdstore_action_parameters.json diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java index db200cd6a7..59fe941ed5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java +++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java @@ -157,7 +157,9 @@ public class MDStore implements Serializable { @Override public String toString() { return String - .format("MDStore [id=%s, format=%s, layout=%s, interpretation=%s, datasourceName=%s, datasourceId=%s, apiId=%s, hdfsPath=%s, creationDate=%s]", id, format, layout, interpretation, datasourceName, datasourceId, apiId, hdfsPath, creationDate); + .format( + "MDStore [id=%s, format=%s, layout=%s, interpretation=%s, datasourceName=%s, datasourceId=%s, apiId=%s, hdfsPath=%s, creationDate=%s]", + id, format, layout, interpretation, datasourceName, datasourceId, apiId, hdfsPath, creationDate); } @Override @@ -167,8 +169,12 @@ public class MDStore implements Serializable { @Override public boolean equals(final Object obj) { - if (this == obj) { return true; } - if (!(obj instanceof MDStore)) { return false; } + if (this == obj) { + return true; + } + if (!(obj instanceof MDStore)) { + return false; + } final MDStore other = (MDStore) obj; return Objects.equals(id, other.id); } diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java index e25e7dc2ad..d808e2de72 100644 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java +++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java @@ -62,8 +62,12 @@ public class MDStoreCurrentVersion implements Serializable { @Override public boolean equals(final Object obj) { - if (this == obj) { return true; } - if (!(obj instanceof MDStoreCurrentVersion)) { return false; } + if (this == obj) { + return true; + } + if (!(obj instanceof MDStoreCurrentVersion)) { + return false; + } final MDStoreCurrentVersion other = (MDStoreCurrentVersion) obj; return Objects.equals(currentVersion, other.currentVersion) && Objects.equals(mdstore, other.mdstore); } diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java index 26c34fcad6..38f8f275ee 100644 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java +++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java @@ -116,7 +116,9 @@ public class MDStoreVersion implements Serializable { @Override public String toString() { return String - .format("MDStoreVersion [id=%s, mdstore=%s, writing=%s, readCount=%s, lastUpdate=%s, size=%s, hdfsPath=%s]", id, mdstore, writing, readCount, lastUpdate, size, hdfsPath); + .format( + "MDStoreVersion [id=%s, mdstore=%s, writing=%s, readCount=%s, lastUpdate=%s, size=%s, hdfsPath=%s]", id, + mdstore, writing, readCount, lastUpdate, size, hdfsPath); } @Override @@ -126,8 +128,12 @@ public class MDStoreVersion implements Serializable { @Override public boolean equals(final Object obj) { - if (this == obj) { return true; } - if (!(obj instanceof MDStoreVersion)) { return false; } + if (this == obj) { + return true; + } + if (!(obj instanceof MDStoreVersion)) { + return false; + } final MDStoreVersion other = (MDStoreVersion) obj; return Objects.equals(id, other.id); } diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java index e34e4c000b..510c650928 100644 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java +++ b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java @@ -168,7 +168,10 @@ public class MDStoreWithInfo implements Serializable { @Override public String toString() { return String - .format("MDStoreWithInfo [id=%s, format=%s, layout=%s, interpretation=%s, datasourceName=%s, datasourceId=%s, apiId=%s, currentVersion=%s, creationDate=%s, lastUpdate=%s, size=%s, numberOfVersions=%s, hdfsPath=%s]", id, format, layout, interpretation, datasourceName, datasourceId, apiId, currentVersion, creationDate, lastUpdate, size, numberOfVersions, hdfsPath); + .format( + "MDStoreWithInfo [id=%s, format=%s, layout=%s, interpretation=%s, datasourceName=%s, datasourceId=%s, apiId=%s, currentVersion=%s, creationDate=%s, lastUpdate=%s, size=%s, numberOfVersions=%s, hdfsPath=%s]", + id, format, layout, interpretation, datasourceName, datasourceId, apiId, currentVersion, creationDate, + lastUpdate, size, numberOfVersions, hdfsPath); } @Override @@ -178,8 +181,12 @@ public class MDStoreWithInfo implements Serializable { @Override public boolean equals(final Object obj) { - if (this == obj) { return true; } - if (!(obj instanceof MDStoreWithInfo)) { return false; } + if (this == obj) { + return true; + } + if (!(obj instanceof MDStoreWithInfo)) { + return false; + } final MDStoreWithInfo other = (MDStoreWithInfo) obj; return Objects.equals(id, other.id); } diff --git a/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/collector/worker/model/ApiDescriptor.java similarity index 93% rename from dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/collector/worker/model/ApiDescriptor.java index bfd70e8c63..8ba30faeb2 100644 --- a/dhp-common/src/main/java/eu/dnetlib/collector/worker/model/ApiDescriptor.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/collector/worker/model/ApiDescriptor.java @@ -1,5 +1,5 @@ -package eu.dnetlib.collector.worker.model; +package eu.dnetlib.dhp.collector.worker.model; import java.util.HashMap; import java.util.Map; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java new file mode 100644 index 0000000000..014f186066 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java @@ -0,0 +1,54 @@ + +package eu.dnetlib.dhp.common.rest; + +import org.apache.commons.io.IOUtils; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class DNetRestClient { + + private static ObjectMapper mapper = new ObjectMapper(); + + public static T doGET(final String url, Class clazz) throws Exception { + final HttpGet httpGet = new HttpGet(url); + return doHTTPRequest(httpGet, clazz); + } + + public static String doGET(final String url) throws Exception { + final HttpGet httpGet = new HttpGet(url); + return doHTTPRequest(httpGet); + } + + public static String doPOST(final String url, V objParam) throws Exception { + final HttpPost httpPost = new HttpPost(url); + + if (objParam != null) { + final StringEntity entity = new StringEntity(mapper.writeValueAsString(objParam)); + httpPost.setEntity(entity); + httpPost.setHeader("Accept", "application/json"); + httpPost.setHeader("Content-type", "application/json"); + } + return doHTTPRequest(httpPost); + } + + public static T doPOST(final String url, V objParam, Class clazz) throws Exception { + return mapper.readValue(doPOST(url, objParam), clazz); + } + + private static String doHTTPRequest(final HttpUriRequest r) throws Exception { + CloseableHttpClient client = HttpClients.createDefault(); + CloseableHttpResponse response = client.execute(r); + return IOUtils.toString(response.getEntity().getContent()); + } + + private static T doHTTPRequest(final HttpUriRequest r, Class clazz) throws Exception { + return mapper.readValue(doHTTPRequest(r), clazz); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java new file mode 100644 index 0000000000..d4824ed0a1 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -0,0 +1,164 @@ + +package eu.dnetlib.dhp.aggregation.mdstore; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.net.URI; +import java.util.Properties; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.worker.CollectorWorker; +import eu.dnetlib.dhp.common.rest.DNetRestClient; + +public class MDStoreActionNode { + private static final Logger log = LoggerFactory.getLogger(MDStoreActionNode.class); + + enum MDAction { + NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK + + } + + private static final ObjectMapper mapper = new ObjectMapper(); + + public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; + + public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; + public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; + + public static final String READ_LOCK_URL = "%s/mdstores/mdstore/%s/startReading"; + public static final String READ_UNLOCK_URL = "%s/mdstores/version/%s/endReading"; + + private static final String MDSTOREVERSIONPARAM = "mdStoreVersion"; + private static final String MDSTOREREADLOCKPARAM = "mdStoreReadLockVersion"; + + public static void main(String[] args) throws Exception { + final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser( + IOUtils + .toString( + CollectorWorker.class + .getResourceAsStream( + "/eu/dnetlib/dhp/collection/mdstore_action_parameters.json"))); + argumentParser.parseArgument(args); + + final MDAction action = MDAction.valueOf(argumentParser.get("action")); + log.info("Curren action is {}", action); + + final String mdStoreManagerURI = argumentParser.get("mdStoreManagerURI"); + log.info("mdStoreManagerURI is {}", mdStoreManagerURI); + + switch (action) { + case NEW_VERSION: { + final String mdStoreID = argumentParser.get("mdStoreID"); + if (StringUtils.isBlank(mdStoreID)) { + throw new IllegalArgumentException("missing or empty argument mdStoreId"); + } + final MDStoreVersion currentVersion = DNetRestClient + .doGET(String.format(NEW_VERSION_URI, mdStoreManagerURI, mdStoreID), MDStoreVersion.class); + populateOOZIEEnv(MDSTOREVERSIONPARAM, mapper.writeValueAsString(currentVersion)); + break; + } + case COMMIT: { + + final String hdfsuri = argumentParser.get("namenode"); + if (StringUtils.isBlank(hdfsuri)) { + throw new IllegalArgumentException("missing or empty argument namenode"); + } + final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); + + if (StringUtils.isBlank(mdStoreVersion.getId())) { + throw new IllegalArgumentException( + "invalid MDStoreVersion value current is " + mdStoreVersion_params); + } + + Configuration conf = new Configuration(); + // Set FileSystem URI + conf.set("fs.defaultFS", hdfsuri); + // Because of Maven + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + + System.setProperty("hadoop.home.dir", "/"); + // Get the filesystem - HDFS + FileSystem fs = FileSystem.get(URI.create(hdfsuri), conf); + String mdStoreSizeParam = argumentParser.get("mdStoreSize"); + + if (StringUtils.isBlank(mdStoreSizeParam)) { + throw new IllegalArgumentException("missing or empty argument mdStoreSize"); + } + Path hdfstoreSizepath = new Path(mdStoreVersion.getHdfsPath() + "/size"); + + FSDataInputStream inputStream = fs.open(hdfstoreSizepath); + + final Long mdStoreSize = Long.parseLong(IOUtils.toString(inputStream)); + + inputStream.close(); + fs.create(hdfstoreSizepath); + + DNetRestClient + .doGET(String.format(COMMIT_VERSION_URL, mdStoreManagerURI, mdStoreVersion.getId(), mdStoreSize)); + break; + } + case ROLLBACK: { + final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); + + if (StringUtils.isBlank(mdStoreVersion.getId())) { + throw new IllegalArgumentException( + "invalid MDStoreVersion value current is " + mdStoreVersion_params); + } + DNetRestClient.doGET(String.format(ROLLBACK_VERSION_URL, mdStoreManagerURI, mdStoreVersion.getId())); + break; + } + + case READ_LOCK: { + final String mdStoreID = argumentParser.get("mdStoreID"); + if (StringUtils.isBlank(mdStoreID)) { + throw new IllegalArgumentException("missing or empty argument mdStoreId"); + } + final MDStoreVersion currentVersion = DNetRestClient + .doGET(String.format(READ_LOCK_URL, mdStoreManagerURI, mdStoreID), MDStoreVersion.class); + populateOOZIEEnv(MDSTOREREADLOCKPARAM, mapper.writeValueAsString(currentVersion)); + break; + } + case READ_UNLOCK: { + final String mdStoreVersion_params = argumentParser.get("readMDStoreId"); + final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); + + if (StringUtils.isBlank(mdStoreVersion.getId())) { + throw new IllegalArgumentException( + "invalid MDStoreVersion value current is " + mdStoreVersion_params); + } + DNetRestClient.doGET(String.format(READ_UNLOCK_URL, mdStoreManagerURI, mdStoreVersion.getId())); + break; + } + + default: + throw new IllegalArgumentException("invalid action"); + } + + } + + public static void populateOOZIEEnv(final String paramName, String value) throws Exception { + File file = new File(System.getProperty("oozie.action.output.properties")); + Properties props = new Properties(); + + props.setProperty(paramName, value); + OutputStream os = new FileOutputStream(file); + props.store(os, ""); + os.close(); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index c9c29b4ea0..b28327a405 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -3,13 +3,17 @@ package eu.dnetlib.dhp.collection; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.ByteArrayInputStream; +import java.io.*; import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; +import java.util.Properties; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; @@ -19,6 +23,7 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.Node; @@ -28,7 +33,11 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication; +import eu.dnetlib.dhp.common.rest.DNetRestClient; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.Provenance; import eu.dnetlib.message.MessageManager; @@ -36,6 +45,7 @@ import eu.dnetlib.message.MessageManager; public class GenerateNativeStoreSparkJob { private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); + private static final String DATASET_NAME = "/store"; public static void main(String[] args) throws Exception { @@ -50,11 +60,15 @@ public class GenerateNativeStoreSparkJob { final String provenanceArgument = parser.get("provenance"); log.info("Provenance is {}", provenanceArgument); final Provenance provenance = jsonMapper.readValue(provenanceArgument, Provenance.class); + final String dateOfCollectionArgs = parser.get("dateOfCollection"); log.info("dateOfCollection is {}", dateOfCollectionArgs); final long dateOfCollection = new Long(dateOfCollectionArgs); - final String sequenceFileInputPath = parser.get("input"); - log.info("sequenceFileInputPath is {}", dateOfCollectionArgs); + + String mdStoreVersion = parser.get("mdStoreVersion"); + log.info("mdStoreVersion is {}", mdStoreVersion); + + final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) @@ -70,7 +84,9 @@ public class GenerateNativeStoreSparkJob { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaPairRDD inputRDD = sc - .sequenceFile(sequenceFileInputPath, IntWritable.class, Text.class); + .sequenceFile( + currentVersion.getHdfsPath() + CollectorWorkerApplication.SEQUENTIAL_FILE_NAME, + IntWritable.class, Text.class); final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems"); final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords"); @@ -89,12 +105,26 @@ public class GenerateNativeStoreSparkJob { .distinct(); final Encoder encoder = Encoders.bean(MetadataRecord.class); - final Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); - final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords"); - mdStoreRecords.add(mdstore.count()); + Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); - mdstore.write().format("parquet").save(parser.get("output")); + mdstore + .write() + .mode(SaveMode.Overwrite) + .format("parquet") + .save(currentVersion.getHdfsPath() + DATASET_NAME); + mdstore = spark.read().load(currentVersion.getHdfsPath() + DATASET_NAME).as(encoder); + final Long total = mdstore.count(); + + FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); + + FSDataOutputStream output = fs.create(new Path(currentVersion.getHdfsPath() + "/size")); + + final BufferedOutputStream os = new BufferedOutputStream(output); + + os.write(total.toString().getBytes(StandardCharsets.UTF_8)); + + os.close(); }); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 7146e610ed..ba9bd662e0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -3,8 +3,8 @@ package eu.dnetlib.dhp.collection.plugin; import java.util.stream.Stream; -import eu.dnetlib.collector.worker.model.ApiDescriptor; import eu.dnetlib.dhp.collection.worker.CollectorException; +import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; public interface CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index c4c52271a0..a5e2615536 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -13,9 +13,9 @@ import com.google.common.base.Splitter; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import eu.dnetlib.collector.worker.model.ApiDescriptor; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.worker.CollectorException; +import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; public class OaiCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java index 380db641a8..3605bdfd6c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java @@ -14,12 +14,9 @@ import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.collector.worker.model.ApiDescriptor; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; +import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; public class CollectorWorker { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java index 5e8d0f9c23..29ae98c5bd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java @@ -1,15 +1,22 @@ package eu.dnetlib.dhp.collection.worker; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.Properties; + import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.collector.worker.model.ApiDescriptor; +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; +import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; +import eu.dnetlib.dhp.common.rest.DNetRestClient; /** * DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into HDFS. This module @@ -24,6 +31,8 @@ public class CollectorWorkerApplication { private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory(); + public static String SEQUENTIAL_FILE_NAME = "/sequence_file"; + /** * @param args */ @@ -38,18 +47,23 @@ public class CollectorWorkerApplication { argumentParser.parseArgument(args); final String hdfsuri = argumentParser.get("namenode"); - log.info("hdfsURI is {}", hdfsuri); - final String hdfsPath = argumentParser.get("hdfsPath"); - log.info("hdfsPath is {}" + hdfsPath); + final String apiDescriptor = argumentParser.get("apidescriptor"); - log.info("apiDescriptor is {}" + apiDescriptor); + log.info("apiDescriptor is {}", apiDescriptor); + + final String mdStoreVersion = argumentParser.get("mdStoreVersion"); + log.info("mdStoreVersion is {}", mdStoreVersion); final ObjectMapper jsonMapper = new ObjectMapper(); - final ApiDescriptor api = jsonMapper.readValue(apiDescriptor, ApiDescriptor.class); + final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); - final CollectorWorker worker = new CollectorWorker(collectorPluginFactory, api, hdfsuri, hdfsPath); + final ApiDescriptor api = jsonMapper.readValue(apiDescriptor, ApiDescriptor.class); + final CollectorWorker worker = new CollectorWorker(collectorPluginFactory, api, hdfsuri, + currentVersion.getHdfsPath() + SEQUENTIAL_FILE_NAME); worker.collect(); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml index 2e0ed9aeea..dd3c32c620 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml @@ -15,4 +15,9 @@ oozie.action.sharelib.for.spark spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json index 7f51139307..c1aa03bcdd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json @@ -30,15 +30,9 @@ "paramRequired": true }, { - "paramName": "i", - "paramLongName": "input", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "output", - "paramDescription": "the path of the result DataFrame on HDFS", + "paramName": "mv", + "paramLongName": "mdStoreVersion", + "paramDescription": "the Metadata Store Version Info", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collector_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collector_parameter.json index 901664e0df..60e9762ff2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collector_parameter.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collector_parameter.json @@ -1,6 +1,26 @@ [ - {"paramName":"p", "paramLongName":"hdfsPath", "paramDescription": "the path where storing the sequential file", "paramRequired": true}, - {"paramName":"a", "paramLongName":"apidescriptor", "paramDescription": "the JSON encoding of the API Descriptor", "paramRequired": true}, - {"paramName":"n", "paramLongName":"namenode", "paramDescription": "the Name Node URI", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workflowId", "paramDescription": "the identifier of the dnet Workflow", "paramRequired": false} + { + "paramName": "a", + "paramLongName": "apidescriptor", + "paramDescription": "the JSON encoding of the API Descriptor", + "paramRequired": true + }, + { + "paramName": "n", + "paramLongName": "namenode", + "paramDescription": "the Name Node URI", + "paramRequired": true + }, + { + "paramName": "mv", + "paramLongName": "mdStoreVersion", + "paramDescription": "the MDStore Version bean", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workflowId", + "paramDescription": "the identifier of the dnet Workflow", + "paramRequired": false + } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/mdstore_action_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/mdstore_action_parameters.json new file mode 100644 index 0000000000..57a218a342 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/mdstore_action_parameters.json @@ -0,0 +1,45 @@ +[ + { + "paramName": "a", + "paramLongName": "action", + "paramDescription": "the JSON encoding of the API Descriptor", + "paramRequired": true + }, + { + "paramName": "mu", + "paramLongName": "mdStoreManagerURI", + "paramDescription": "the MDStore Manager URI", + "paramRequired": true + }, + { + "paramName": "mi", + "paramLongName": "mdStoreID", + "paramDescription": "the Metadata Store ID", + "paramRequired": false + }, + { + "paramName": "ms", + "paramLongName": "mdStoreSize", + "paramDescription": "the Metadata Store Size", + "paramRequired": false + }, + { + "paramName": "mv", + "paramLongName": "mdStoreVersion", + "paramDescription": "the Metadata Version Bean", + "paramRequired": false + }, + { + "paramName": "n", + "paramLongName": "namenode", + "paramDescription": "the Name Node URI", + "paramRequired": false + }, + { + "paramName": "rm", + "paramLongName": "readMDStoreId", + "paramDescription": "the ID Locked to Read", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml index 38cd83da76..28abe0965f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml @@ -1,10 +1,5 @@ - - mdStorePath - the path of the native mdstore - - apiDescription A json encoding of the API Description class @@ -16,7 +11,7 @@ identifierPath - An xpath to retrieve the metadata idnentifier for the generation of DNet Identifier + An xpath to retrieve the metadata identifier for the generation of DNet Identifier @@ -33,26 +28,78 @@ workflowId The identifier of the workflow + + + mdStoreID + The identifier of the mdStore + + + + mdStoreManagerURI + The URI of the MDStore Manager + + + + collectionMode + Should be Refresh or Incremental + + ${jobTracker} ${nameNode} - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + ${wf:conf('collectionMode') eq 'REFRESH'} + ${wf:conf('collectionMode') eq 'INCREMENTAL'} + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_LOCK + --mdStoreID${mdStoreID} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreID} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication - --hdfsPath${workingDir}/sequenceFile_${mdstoreVersion} --apidescriptor${apiDescription} --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} - + @@ -75,13 +122,56 @@ --dateOfCollection${timestamp} --provenance${dataSourceInfo} --xpath${identifierPath} - --input${workingDir}/sequenceFile - --output${mdStorePath} - -w${workflowId} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + + + + + + + + ${wf:conf('collectionMode') eq 'REFRESH'} + ${wf:conf('collectionMode') eq 'INCREMENTAL'} + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --mdStoreVersion${wf:actionData('CollectionWorker')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java index fc19f20647..9abfbacacf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java @@ -2,25 +2,18 @@ package eu.dnetlib.dhp.collector.worker; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.mockito.Mockito.*; -import java.io.File; import java.nio.file.Path; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.collector.worker.model.ApiDescriptor; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.collection.worker.CollectorWorker; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; -import eu.dnetlib.message.Message; -import eu.dnetlib.message.MessageManager; +import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; @Disabled public class DnetCollectorWorkerApplicationTests { From 8ee82576c686fb7f7ff6c840ce91b6e6809d1dc8 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 29 Jan 2021 17:02:46 +0100 Subject: [PATCH 02/17] Collection on Refresh WORKS!!! --- .../dhp/aggregation/mdstore/MDStoreActionNode.java | 8 ++------ .../dnetlib/dhp/collection/CollectionJobTest.java | 13 +++++++++++++ .../resources/eu/dnetlib/dhp/collection/input.json | 9 +++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java index d4824ed0a1..6cb0537b2e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -38,8 +38,8 @@ public class MDStoreActionNode { public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; - public static final String READ_LOCK_URL = "%s/mdstores/mdstore/%s/startReading"; - public static final String READ_UNLOCK_URL = "%s/mdstores/version/%s/endReading"; + public static final String READ_LOCK_URL = "%s/mdstore/%s/startReading"; + public static final String READ_UNLOCK_URL = "%s/version/%s/endReading"; private static final String MDSTOREVERSIONPARAM = "mdStoreVersion"; private static final String MDSTOREREADLOCKPARAM = "mdStoreReadLockVersion"; @@ -94,11 +94,7 @@ public class MDStoreActionNode { System.setProperty("hadoop.home.dir", "/"); // Get the filesystem - HDFS FileSystem fs = FileSystem.get(URI.create(hdfsuri), conf); - String mdStoreSizeParam = argumentParser.get("mdStoreSize"); - if (StringUtils.isBlank(mdStoreSizeParam)) { - throw new IllegalArgumentException("missing or empty argument mdStoreSize"); - } Path hdfstoreSizepath = new Path(mdStoreVersion.getHdfsPath() + "/size"); FSDataInputStream inputStream = fs.open(hdfstoreSizepath); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java index c3b05f5c91..6f7bb2bc2d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java @@ -16,6 +16,8 @@ import org.junit.jupiter.api.io.TempDir; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreCurrentVersion; +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.Provenance; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -37,6 +39,17 @@ public class CollectionJobTest { spark.stop(); } + @Test + public void testJSONSerialization() throws Exception { + final String s = IOUtils.toString(getClass().getResourceAsStream("input.json")); + System.out.println("s = " + s); + final ObjectMapper mapper = new ObjectMapper(); + MDStoreVersion mi = mapper.readValue(s, MDStoreVersion.class); + + assertNotNull(mi); + + } + @Test public void tesCollection(@TempDir Path testDir) throws Exception { final Provenance provenance = new Provenance("pippo", "puppa", "ns_prefix"); diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json new file mode 100644 index 0000000000..4ffc33d247 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json @@ -0,0 +1,9 @@ +{ + "id": "md-7557225f-77cc-407d-bdf4-d2fe03131464-1611935085410", + "mdstore": "md-7557225f-77cc-407d-bdf4-d2fe03131464", + "writing": true, + "readCount": 0, + "lastUpdate": null, + "size": 0, + "hdfsPath": "/data/dnet.dev/mdstore/md-7557225f-77cc-407d-bdf4-d2fe03131464/md-7557225f-77cc-407d-bdf4-d2fe03131464-1611935085410" +} \ No newline at end of file From e423634cb6f1a7c301e5af04a9de8246e1e53d0c Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 29 Jan 2021 17:21:42 +0100 Subject: [PATCH 03/17] RollBack in case of error WORKS!!! --- .../eu/dnetlib/dhp/collection/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml index 28abe0965f..527ec17276 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml @@ -41,7 +41,7 @@ collectionMode - Should be Refresh or Incremental + Should be REFRESH or INCREMENTAL @@ -164,8 +164,8 @@ eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode - --actionCOMMIT - --mdStoreVersion${wf:actionData('CollectionWorker')['mdStoreVersion']} + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --mdStoreManagerURI${mdStoreManagerURI} From b6b835ef49f3977cd43f1e5a1087720015e3a1ee Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 1 Feb 2021 08:49:42 +0100 Subject: [PATCH 04/17] update transformation Factory to get Transformation Rule by Id and not by Title --- .../dnetlib/dhp/transformation/TransformationFactory.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 58292139a6..fbaef1d1f9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -18,7 +18,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class TransformationFactory { private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); - public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//TITLE = \"%s\" return $x//CODE/text()"; + public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/text()"; public static MapFunction getTransformationPlugin( final Map jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) @@ -54,15 +54,15 @@ public class TransformationFactory { } } - private static String queryTransformationRuleFromIS(final String transformationRuleName, + private static String queryTransformationRuleFromIS(final String transformationRuleId, final ISLookUpService isLookUpService) throws Exception { - final String query = String.format(TRULE_XQUERY, transformationRuleName); + final String query = String.format(TRULE_XQUERY, transformationRuleId); log.info("asking query to IS: " + query); List result = isLookUpService.quickSearchProfile(query); if (result == null || result.isEmpty()) throw new DnetTransformationException( - "Unable to find transformation rule with name: " + transformationRuleName); + "Unable to find transformation rule with name: " + transformationRuleId); return result.get(0); } From 6ff234d81bd4cdec1c1c1745b24a7c3901e90afb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 1 Feb 2021 13:56:05 +0100 Subject: [PATCH 05/17] Implemented a first prototype of incremental harvesting and trasformation using readlock --- dhp-common/pom.xml | 6 ++ .../common/AggregationUtility.java | 28 ++++++ .../GenerateNativeStoreSparkJob.java | 97 +++++++++++++++---- .../transformation/TransformSparkJobNode.java | 39 ++++---- .../transformation/TransformationFactory.java | 6 +- .../collection_input_parameters.json | 6 ++ .../collection/oozie_app/config-default.xml | 4 + .../dhp/collection/oozie_app/workflow.xml | 33 ++++++- .../oozie_app/config-default.xml | 4 + .../dhp/transformation/oozie_app/workflow.xml | 96 +++++++++++++++--- .../transformation_input_parameters.json | 10 +- .../eu/dnetlib/dhp/transform/ext_simple.xsl | 4 +- .../eu/dnetlib/dhp/transform/input.xml | 96 ++++++------------ 13 files changed, 297 insertions(+), 132 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index b295bc1f19..6eb2e0358d 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -98,6 +98,12 @@ dnet-pace-core + + org.apache.httpcomponents + httpclient + + + eu.dnetlib.dhp dhp-schemas diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java new file mode 100644 index 0000000000..1f5ed27cb1 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java @@ -0,0 +1,28 @@ + +package eu.dnetlib.dhp.aggregation.common; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.spark.sql.SparkSession; + +public class AggregationUtility { + + public static void writeTotalSizeOnHDFS(final SparkSession spark, final Long total, final String path) + throws IOException { + + FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); + + FSDataOutputStream output = fs.create(new Path(path)); + + final BufferedOutputStream os = new BufferedOutputStream(output); + + os.write(total.toString().getBytes(StandardCharsets.UTF_8)); + + os.close(); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index b28327a405..466ddcd21f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -5,9 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.*; import java.nio.charset.StandardCharsets; +import java.util.Collections; import java.util.Objects; import java.util.Optional; -import java.util.Properties; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -20,10 +20,9 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoder; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.Node; @@ -34,19 +33,62 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; -import eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode; +import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication; -import eu.dnetlib.dhp.common.rest.DNetRestClient; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.Provenance; -import eu.dnetlib.message.MessageManager; +import scala.Tuple2; public class GenerateNativeStoreSparkJob { private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); private static final String DATASET_NAME = "/store"; + public static class MDStoreAggregator extends Aggregator { + + @Override + public MetadataRecord zero() { + return new MetadataRecord(); + } + + @Override + public MetadataRecord reduce(MetadataRecord b, MetadataRecord a) { + + return getLatestRecord(b, a); + } + + private MetadataRecord getLatestRecord(MetadataRecord b, MetadataRecord a) { + if (b == null) + return a; + + if (a == null) + return b; + return (a.getDateOfCollection() > b.getDateOfCollection()) ? a : b; + } + + @Override + public MetadataRecord merge(MetadataRecord b, MetadataRecord a) { + return getLatestRecord(b, a); + } + + @Override + public MetadataRecord finish(MetadataRecord j) { + return j; + } + + @Override + public Encoder bufferEncoder() { + return Encoders.kryo(MetadataRecord.class); + } + + @Override + public Encoder outputEncoder() { + return Encoders.kryo(MetadataRecord.class); + } + + } + public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -70,6 +112,12 @@ public class GenerateNativeStoreSparkJob { final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); + String readMdStoreVersionParam = parser.get("readMdStoreVersion"); + log.info("readMdStoreVersion is {}", readMdStoreVersionParam); + + final MDStoreVersion readMdStoreVersion = StringUtils.isBlank(readMdStoreVersionParam) ? null + : jsonMapper.readValue(readMdStoreVersionParam, MDStoreVersion.class); + Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) @@ -77,6 +125,9 @@ public class GenerateNativeStoreSparkJob { log.info("isSparkSessionManaged: {}", isSparkSessionManaged); SparkConf conf = new SparkConf(); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(Collections.singleton(MetadataRecord.class).toArray(new Class[] {})); + runWithSparkSession( conf, isSparkSessionManaged, @@ -105,8 +156,27 @@ public class GenerateNativeStoreSparkJob { .distinct(); final Encoder encoder = Encoders.bean(MetadataRecord.class); + Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); + if (readMdStoreVersion != null) { + // INCREMENTAL MODE + + Dataset currentMdStoreVersion = spark + .read() + .load(readMdStoreVersion.getHdfsPath() + DATASET_NAME) + .as(encoder); + TypedColumn aggregator = new MDStoreAggregator().toColumn(); + + mdstore = currentMdStoreVersion + .union(mdstore) + .groupByKey( + (MapFunction) MetadataRecord::getId, + Encoders.STRING()) + .agg(aggregator) + .map((MapFunction, MetadataRecord>) Tuple2::_2, encoder); + + } mdstore .write() .mode(SaveMode.Overwrite) @@ -116,17 +186,8 @@ public class GenerateNativeStoreSparkJob { final Long total = mdstore.count(); - FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); - - FSDataOutputStream output = fs.create(new Path(currentVersion.getHdfsPath() + "/size")); - - final BufferedOutputStream os = new BufferedOutputStream(output); - - os.write(total.toString().getBytes(StandardCharsets.UTF_8)); - - os.close(); + AggregationUtility.writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size"); }); - } public static MetadataRecord parseRecord( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index c6ed5a1e3a..b9df902a1d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -3,14 +3,11 @@ package eu.dnetlib.dhp.transformation; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.ByteArrayInputStream; -import java.util.HashMap; +import java.io.IOException; import java.util.Map; -import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -18,25 +15,18 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.apache.spark.util.LongAccumulator; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; +import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper; -import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; -import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.message.Message; -import eu.dnetlib.message.MessageManager; -import eu.dnetlib.message.MessageType; public class TransformSparkJobNode { @@ -59,10 +49,14 @@ public class TransformSparkJobNode { .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String inputPath = parser.get("mdstoreInputPath"); - final String outputPath = parser.get("mdstoreOutputPath"); + final String mdstoreInputVersion = parser.get("mdstoreInputVersion"); + final String mdstoreOutputVersion = parser.get("mdstoreOutputVersion"); // TODO this variable will be used after implementing Messaging with DNet Aggregator + final ObjectMapper jsonMapper = new ObjectMapper(); + final MDStoreVersion nativeMdStoreVersion = jsonMapper.readValue(mdstoreInputVersion, MDStoreVersion.class); + final MDStoreVersion cleanedMdStoreVersion = jsonMapper.readValue(mdstoreOutputVersion, MDStoreVersion.class); + final String isLookupUrl = parser.get("isLookupUrl"); log.info(String.format("isLookupUrl: %s", isLookupUrl)); @@ -72,11 +66,14 @@ public class TransformSparkJobNode { runWithSparkSession( conf, isSparkSessionManaged, - spark -> transformRecords(parser.getObjectMap(), isLookupService, spark, inputPath, outputPath)); + spark -> transformRecords( + parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath(), + cleanedMdStoreVersion.getHdfsPath())); } public static void transformRecords(final Map args, final ISLookUpService isLookUpService, - final SparkSession spark, final String inputPath, final String outputPath) throws DnetTransformationException { + final SparkSession spark, final String inputPath, final String outputPath) + throws DnetTransformationException, IOException { final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems"); final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems"); @@ -86,11 +83,13 @@ public class TransformSparkJobNode { final Dataset mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder); final MapFunction XSLTTransformationFunction = TransformationFactory .getTransformationPlugin(args, ct, isLookUpService); - mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath); + mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath + "/store"); log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Total item " + ct.getTotalItems().count()); log.info("Transformation Error item " + ct.getErrorItems().count()); + + AggregationUtility.writeTotalSizeOnHDFS(spark, ct.getProcessedItems().count(), outputPath + "/size"); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index fbaef1d1f9..d1f8969647 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -30,13 +30,13 @@ public class TransformationFactory { log.info("Transformation plugin required " + transformationPlugin); switch (transformationPlugin) { case "XSLT_TRANSFORM": { - final String transformationRuleName = jobArgument.get("transformationRuleTitle"); - if (StringUtils.isBlank(transformationRuleName)) + final String transformationRuleId = jobArgument.get("transformationRuleId"); + if (StringUtils.isBlank(transformationRuleId)) throw new DnetTransformationException("Missing Parameter transformationRule"); final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService); final String transformationRule = queryTransformationRuleFromIS( - transformationRuleName, isLookupService); + transformationRuleId, isLookupService); final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json index c1aa03bcdd..987f004bbc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json @@ -35,6 +35,12 @@ "paramDescription": "the Metadata Store Version Info", "paramRequired": true }, + { + "paramName": "rmv", + "paramLongName": "readMdStoreVersion", + "paramDescription": "the Read Lock Metadata Store Version bean", + "paramRequired": false + }, { "paramName": "w", "paramLongName": "workflowId", diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml index 2e0ed9aeea..e77dd09c9d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/config-default.xml @@ -15,4 +15,8 @@ oozie.action.sharelib.for.spark spark2 + + oozie.launcher.mapreduce.user.classpath.first + true + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml index 527ec17276..9c213bee5f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml @@ -51,7 +51,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -61,7 +61,7 @@ ${wf:conf('collectionMode') eq 'REFRESH'} ${wf:conf('collectionMode') eq 'INCREMENTAL'} - + @@ -99,7 +99,7 @@ --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} - + @@ -123,9 +123,10 @@ --provenance${dataSourceInfo} --xpath${identifierPath} --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --readMdStoreVersion${wf:actionData('BeginRead')['mdStoreReadLockVersion']} - + @@ -133,7 +134,7 @@ ${wf:conf('collectionMode') eq 'REFRESH'} ${wf:conf('collectionMode') eq 'INCREMENTAL'} - + @@ -161,6 +162,28 @@ + + + ${wf:conf('collectionMode') eq 'REFRESH'} + ${wf:conf('collectionMode') eq 'INCREMENTAL'} + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml index 2e0ed9aeea..e77dd09c9d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml @@ -15,4 +15,8 @@ oozie.action.sharelib.for.spark spark2 + + oozie.launcher.mapreduce.user.classpath.first + true + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml index b36bc37661..aff87dc79e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml @@ -1,25 +1,25 @@ - mdstoreInputPath - the path of the native MDStore + mdStoreInputId + the identifier of the native MDStore - - mdstoreOutputPath + mdStoreOutputId + the identifier of the cleaned MDStore + + + mdStoreManagerURI the path of the cleaned mdstore - - transformationRuleTitle + transformationRuleId The transformation Rule to apply - transformationPlugin The transformation Plugin - dateOfTransformation The timestamp of the transformation date @@ -28,11 +28,34 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_LOCK + --mdStoreID${mdStoreInputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreOutputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + yarn @@ -49,18 +72,63 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --mdstoreInputPath${mdstoreInputPath} - --mdstoreOutputPath${mdstoreOutputPath} + --mdstoreInputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdstoreOutputVersion${wf:actionData('BeginRead')['mdStoreReadLockVersion']} --dateOfTransformation${dateOfTransformation} --transformationPlugin${transformationPlugin} - --transformationRuleTitle${transformationRuleTitle} - - + --transformationRuleId${transformationRuleId} + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json index cbd2f25abd..d92698de52 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json @@ -13,19 +13,19 @@ }, { "paramName": "i", - "paramLongName": "mdstoreInputPath", - "paramDescription": "the path of the sequencial file to read", + "paramLongName": "mdstoreInputVersion", + "paramDescription": "the mdStore Version bean of the Input", "paramRequired": true }, { "paramName": "o", - "paramLongName": "mdstoreOutputPath", - "paramDescription": "the path of the result DataFrame on HDFS", + "paramLongName": "mdstoreOutputVersion", + "paramDescription": "the mdStore Version bean of the Output", "paramRequired": true }, { "paramName": "tr", - "paramLongName": "transformationRuleTitle", + "paramLongName": "transformationRuleId", "paramDescription": "the transformation Rule to apply to the input MDStore", "paramRequired": true }, diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl index 9e5f84c117..becd3a05e3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl @@ -9,7 +9,9 @@ - + + + diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml index 8efb3c4876..ebe8e919b4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml @@ -1,68 +1,32 @@ - - - - od______2294::00029b7f0a2a7e090e55b625a9079d83 - oai:pub.uni-bielefeld.de:2578942 - 2018-11-23T15:15:33.974+01:00 - od______2294 - oai:pub.uni-bielefeld.de:2578942 - 2018-07-24T13:01:16Z - conference - ddc:000 - conferenceFtxt - driver - open_access - - - - Mobile recommendation agents making online use of visual attention information at the point of sale - Pfeiffer, Thies - Pfeiffer, Jella - Meißner, Martin - Davis, Fred - Riedl, René - Jan, vom Brocke - Léger, Pierre-Majorique - Randolph, Adriane - Mobile Cognitive Assistance Systems - Information Systems - ddc:000 - We aim to utilize online information about visual attention for developing mobile recommendation agents (RAs) for use at the point of sale. Up to now, most RAs are focussed exclusively at personalization in an e-commerce setting. Very little is known, however, about mobile RAs that offer information and assistance at the point of sale based on individual-level feature based preference models (Murray and Häubl 2009). Current attempts provide information about products at the point of sale by manually scanning barcodes or using RFID (Kowatsch et al. 2011, Heijden 2005), e.g. using specific apps for smartphones. We argue that an online access to the current visual attention of the user offers a much larger potential. Integrating mobile eye tracking into ordinary glasses would yield a direct benefit of applying neuroscience methods in the user’s everyday life. First, learning from consumers’ attentional processes over time and adapting recommendations based on this learning allows us to provide very accurate and relevant recommendations, potentially increasing the perceived usefulness. Second, our proposed system needs little explicit user input (no scanning or navigation on screen) making it easy to use. Thus, instead of learning from click behaviour and past customer ratings, as it is the case in the e-commerce setting, the mobile RA learns from eye movements by participating online in every day decision processes. We argue that mobile RAs should be built based on current research in human judgment and decision making (Murray et al. 2010). In our project, we therefore follow a two-step approach: In the empirical basic research stream, we aim to understand the user’s interaction with the product shelf: the actions and patterns of user’s behaviour (eye movements, gestures, approaching a product closer) and their correspondence to the user’s informational needs. In the empirical system development stream, we create prototypes of mobile RAs and test experimentally the factors that influence the user’s adoption. For example, we suggest that a user’s involvement in the process, such as a need for exact nutritional information or for assistance (e.g., reading support for elderly) will influence the user’s intention to use such as system. The experiments are conducted both in our immersive virtual reality supermarket presented in a CAVE, where we can also easily display information to the user and track the eye movement in great accuracy, as well as in real-world supermarkets (see Figure 1), so that the findings can be better generalized to natural decision situations (Gidlöf et al. 2013). In a first pilot study with five randomly chosen participants in a supermarket, we evaluated which sort of mobile RAs consumers favour in order to get a first impression of the user’s acceptance of the technology. Figure 1 shows an excerpt of one consumer’s eye movements during a decision process. First results show long eye cascades and short fixations on many products in situations where users are uncertain and in need for support. Furthermore, we find a surprising acceptance of the technology itself throughout all ages (23 – 61 years). At the same time, consumers express serious fear of being manipulated by such a technology. For that reason, they strongly prefer the information to be provided by trusted third party or shared with family members and friends (see also Murray and Häubl 2009). Our pilot will be followed by a larger field experiment in March in order to learn more about factors that influence the user’s acceptance as well as the eye movement patterns that reflect typical phases of decision processes and indicate the need for support by a RA. - 2013 - info:eu-repo/semantics/conferenceObject - doc-type:conferenceObject - text - https://pub.uni-bielefeld.de/record/2578942 - https://pub.uni-bielefeld.de/download/2578942/2602478 - Pfeiffer T, Pfeiffer J, Meißner M. Mobile recommendation agents making online use of visual attention information at the point of sale. In: Davis F, Riedl R, Jan vom B, Léger P-M, Randolph A, eds. Proceedings of the Gmunden Retreat on NeuroIS 2013. 2013: 3-3. - eng - info:eu-repo/semantics/openAccess + +
+ oai:lib.psnc.pl:278 + 2011-08-25T15:17:13Z + PSNCRepository:PSNCExternalRepository:exhibitions + PSNCRepository:PSNCExternalRepository:Departments + PSNCRepository:PSNCExternalRepository:Departments:NetworkServices + PSNCRepository:PSNCExternalRepository + PSNCRepository:PSNCExternalRepository:publications + PSNCRepository +
+ + + + + + + + + + + + + + + + - - - - http://pub.uni-bielefeld.de/oai - oai:pub.uni-bielefeld.de:2578942 - 2018-07-24T13:01:16Z - http://www.openarchives.org/OAI/2.0/oai_dc/ - - - - false - false - 0.9 - - - - -
+ \ No newline at end of file From bead34d11a889b716a256fb0b763995d2c220f0f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 1 Feb 2021 14:58:06 +0100 Subject: [PATCH 06/17] code refactor --- .../GenerateNativeStoreSparkJob.java | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index 466ddcd21f..553a3dc5fb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -157,8 +157,9 @@ public class GenerateNativeStoreSparkJob { final Encoder encoder = Encoders.bean(MetadataRecord.class); - Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); + final Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); + final String targetPath = currentVersion.getHdfsPath() + DATASET_NAME; if (readMdStoreVersion != null) { // INCREMENTAL MODE @@ -168,28 +169,35 @@ public class GenerateNativeStoreSparkJob { .as(encoder); TypedColumn aggregator = new MDStoreAggregator().toColumn(); - mdstore = currentMdStoreVersion - .union(mdstore) - .groupByKey( - (MapFunction) MetadataRecord::getId, - Encoders.STRING()) - .agg(aggregator) - .map((MapFunction, MetadataRecord>) Tuple2::_2, encoder); + saveDataset( + currentMdStoreVersion + .union(mdstore) + .groupByKey( + (MapFunction) MetadataRecord::getId, + Encoders.STRING()) + .agg(aggregator) + .map((MapFunction, MetadataRecord>) Tuple2::_2, encoder), + targetPath); + } else { + saveDataset(mdstore, targetPath); } - mdstore - .write() - .mode(SaveMode.Overwrite) - .format("parquet") - .save(currentVersion.getHdfsPath() + DATASET_NAME); - mdstore = spark.read().load(currentVersion.getHdfsPath() + DATASET_NAME).as(encoder); - final Long total = mdstore.count(); + final Long total = spark.read().load(targetPath).count(); AggregationUtility.writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size"); }); } + private static void saveDataset(final Dataset currentMdStore, final String targetPath) { + currentMdStore + .write() + .mode(SaveMode.Overwrite) + .format("parquet") + .save(targetPath); + + } + public static MetadataRecord parseRecord( final String input, final String xpath, From 8eaa1fd4b411c6757bc75b9f38a155b106e97a29 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 1 Feb 2021 19:29:10 +0100 Subject: [PATCH 07/17] WIP: metadata collection in INCREMENTAL mode and relative test --- .../dhp/model/mdstore/MetadataRecord.java | 16 +- .../common/AggregationUtility.java | 33 ++- .../GenerateNativeStoreSparkJob.java | 259 +++++++++--------- .../worker/CollectorWorkerApplication.java | 10 +- .../GenerateNativeStoreSparkJobTest.java | 169 ++++++++++++ .../eu/dnetlib/dhp/collection/input.json | 9 - .../dhp/collection/mdStoreVersion_1.json | 9 + .../dhp/collection/mdStoreVersion_2.json | 9 + .../eu/dnetlib/dhp/collection/provenance.json | 5 + .../eu/dnetlib/dhp/collection/sequence_file | Bin 0 -> 52308 bytes 10 files changed, 360 insertions(+), 159 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_1.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_2.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/provenance.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/sequence_file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java index ce65e710f9..0b59dcce07 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java @@ -26,13 +26,13 @@ public class MetadataRecord implements Serializable { private String body; /** the date when the record has been stored */ - private long dateOfCollection; + private Long dateOfCollection; /** the date when the record has been stored */ - private long dateOfTransformation; + private Long dateOfTransformation; public MetadataRecord() { - this.dateOfCollection = System.currentTimeMillis(); + } public MetadataRecord( @@ -40,7 +40,7 @@ public class MetadataRecord implements Serializable { String encoding, Provenance provenance, String body, - long dateOfCollection) { + Long dateOfCollection) { this.originalId = originalId; this.encoding = encoding; @@ -90,19 +90,19 @@ public class MetadataRecord implements Serializable { this.body = body; } - public long getDateOfCollection() { + public Long getDateOfCollection() { return dateOfCollection; } - public void setDateOfCollection(long dateOfCollection) { + public void setDateOfCollection(Long dateOfCollection) { this.dateOfCollection = dateOfCollection; } - public long getDateOfTransformation() { + public Long getDateOfTransformation() { return dateOfTransformation; } - public void setDateOfTransformation(long dateOfTransformation) { + public void setDateOfTransformation(Long dateOfTransformation) { this.dateOfTransformation = dateOfTransformation; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java index 1f5ed27cb1..eb971c4754 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java @@ -8,21 +8,38 @@ import java.nio.charset.StandardCharsets; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob; +import eu.dnetlib.dhp.model.mdstore.MetadataRecord; public class AggregationUtility { + private static final Logger log = LoggerFactory.getLogger(AggregationUtility.class); + public static void writeTotalSizeOnHDFS(final SparkSession spark, final Long total, final String path) throws IOException { - FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); + log.info("writing size ({}) info file {}", total, path); + try (FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); + BufferedOutputStream os = new BufferedOutputStream(fs.create(new Path(path)))) { + os.write(total.toString().getBytes(StandardCharsets.UTF_8)); + os.flush(); + } - FSDataOutputStream output = fs.create(new Path(path)); - - final BufferedOutputStream os = new BufferedOutputStream(output); - - os.write(total.toString().getBytes(StandardCharsets.UTF_8)); - - os.close(); } + + public static void saveDataset(final Dataset mdstore, final String targetPath) { + log.info("saving dataset in: {}", targetPath); + mdstore + .write() + .mode(SaveMode.Overwrite) + .format("parquet") + .save(targetPath); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index 553a3dc5fb..bbed36a9c3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -1,23 +1,20 @@ package eu.dnetlib.dhp.collection; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.*; import java.nio.charset.StandardCharsets; -import java.util.Collections; +import java.util.List; import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; @@ -30,31 +27,155 @@ import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; -import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.Provenance; +import net.sf.saxon.expr.Component; import scala.Tuple2; public class GenerateNativeStoreSparkJob { private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); + + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final String DATASET_NAME = "/store"; + public static void main(String[] args) throws Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GenerateNativeStoreSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/collection/collection_input_parameters.json"))); + parser.parseArgument(args); + + final String provenanceArgument = parser.get("provenance"); + log.info("Provenance is {}", provenanceArgument); + final Provenance provenance = MAPPER.readValue(provenanceArgument, Provenance.class); + + final String dateOfCollectionArgs = parser.get("dateOfCollection"); + log.info("dateOfCollection is {}", dateOfCollectionArgs); + final Long dateOfCollection = new Long(dateOfCollectionArgs); + + String mdStoreVersion = parser.get("mdStoreVersion"); + log.info("mdStoreVersion is {}", mdStoreVersion); + + final MDStoreVersion currentVersion = MAPPER.readValue(mdStoreVersion, MDStoreVersion.class); + + String readMdStoreVersionParam = parser.get("readMdStoreVersion"); + log.info("readMdStoreVersion is {}", readMdStoreVersionParam); + + final MDStoreVersion readMdStoreVersion = StringUtils.isBlank(readMdStoreVersionParam) ? null + : MAPPER.readValue(readMdStoreVersionParam, MDStoreVersion.class); + + final String xpath = parser.get("xpath"); + log.info("xpath is {}", xpath); + + final String encoding = parser.get("encoding"); + log.info("encoding is {}", encoding); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + SparkConf conf = new SparkConf(); + /* + * conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf .registerKryoClasses( new + * Class[] { MetadataRecord.class, Provenance.class }); + */ + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> createNativeMDStore( + spark, provenance, dateOfCollection, xpath, encoding, currentVersion, readMdStoreVersion)); + } + + private static void createNativeMDStore(SparkSession spark, + Provenance provenance, + Long dateOfCollection, + String xpath, + String encoding, + MDStoreVersion currentVersion, + MDStoreVersion readVersion) throws IOException { + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems"); + final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords"); + + final String seqFilePath = currentVersion.getHdfsPath() + CollectorWorkerApplication.SEQUENCE_FILE_NAME; + final JavaRDD nativeStore = sc + .sequenceFile(seqFilePath, IntWritable.class, Text.class) + .map( + item -> parseRecord( + item._2().toString(), + xpath, + encoding, + provenance, + dateOfCollection, + totalItems, + invalidRecords)) + .filter(Objects::nonNull) + .distinct(); + + final Encoder encoder = Encoders.bean(MetadataRecord.class); + final Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); + + final String targetPath = currentVersion.getHdfsPath() + DATASET_NAME; + + if (readVersion != null) { // INCREMENTAL MODE + log.info("updating {} incrementally with {}", targetPath, readVersion.getHdfsPath()); + Dataset currentMdStoreVersion = spark + .read() + .load(readVersion.getHdfsPath() + DATASET_NAME) + .as(encoder); + TypedColumn aggregator = new MDStoreAggregator().toColumn(); + + final Dataset map = currentMdStoreVersion + .union(mdstore) + .groupByKey( + (MapFunction) MetadataRecord::getId, + Encoders.STRING()) + .agg(aggregator) + .map((MapFunction, MetadataRecord>) Tuple2::_2, encoder); + + map.select("id").takeAsList(100).forEach(s -> log.info(s.toString())); + + saveDataset(map, targetPath); + + } else { + saveDataset(mdstore, targetPath); + } + + final Long total = spark.read().load(targetPath).count(); + log.info("collected {} records for datasource '{}'", total, provenance.getDatasourceName()); + + writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size"); + } + public static class MDStoreAggregator extends Aggregator { @Override public MetadataRecord zero() { - return new MetadataRecord(); + return null; } @Override public MetadataRecord reduce(MetadataRecord b, MetadataRecord a) { + return getLatestRecord(b, a); + } + @Override + public MetadataRecord merge(MetadataRecord b, MetadataRecord a) { return getLatestRecord(b, a); } @@ -68,136 +189,22 @@ public class GenerateNativeStoreSparkJob { } @Override - public MetadataRecord merge(MetadataRecord b, MetadataRecord a) { - return getLatestRecord(b, a); - } - - @Override - public MetadataRecord finish(MetadataRecord j) { - return j; + public MetadataRecord finish(MetadataRecord r) { + return r; } @Override public Encoder bufferEncoder() { - return Encoders.kryo(MetadataRecord.class); + return Encoders.bean(MetadataRecord.class); } @Override public Encoder outputEncoder() { - return Encoders.kryo(MetadataRecord.class); + return Encoders.bean(MetadataRecord.class); } } - public static void main(String[] args) throws Exception { - - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GenerateNativeStoreSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/collection/collection_input_parameters.json"))); - parser.parseArgument(args); - final ObjectMapper jsonMapper = new ObjectMapper(); - final String provenanceArgument = parser.get("provenance"); - log.info("Provenance is {}", provenanceArgument); - final Provenance provenance = jsonMapper.readValue(provenanceArgument, Provenance.class); - - final String dateOfCollectionArgs = parser.get("dateOfCollection"); - log.info("dateOfCollection is {}", dateOfCollectionArgs); - final long dateOfCollection = new Long(dateOfCollectionArgs); - - String mdStoreVersion = parser.get("mdStoreVersion"); - log.info("mdStoreVersion is {}", mdStoreVersion); - - final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); - - String readMdStoreVersionParam = parser.get("readMdStoreVersion"); - log.info("readMdStoreVersion is {}", readMdStoreVersionParam); - - final MDStoreVersion readMdStoreVersion = StringUtils.isBlank(readMdStoreVersionParam) ? null - : jsonMapper.readValue(readMdStoreVersionParam, MDStoreVersion.class); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - SparkConf conf = new SparkConf(); - conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(Collections.singleton(MetadataRecord.class).toArray(new Class[] {})); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - final JavaPairRDD inputRDD = sc - .sequenceFile( - currentVersion.getHdfsPath() + CollectorWorkerApplication.SEQUENTIAL_FILE_NAME, - IntWritable.class, Text.class); - - final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems"); - final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords"); - - final JavaRDD nativeStore = inputRDD - .map( - item -> parseRecord( - item._2().toString(), - parser.get("xpath"), - parser.get("encoding"), - provenance, - dateOfCollection, - totalItems, - invalidRecords)) - .filter(Objects::nonNull) - .distinct(); - - final Encoder encoder = Encoders.bean(MetadataRecord.class); - - final Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); - - final String targetPath = currentVersion.getHdfsPath() + DATASET_NAME; - if (readMdStoreVersion != null) { - // INCREMENTAL MODE - - Dataset currentMdStoreVersion = spark - .read() - .load(readMdStoreVersion.getHdfsPath() + DATASET_NAME) - .as(encoder); - TypedColumn aggregator = new MDStoreAggregator().toColumn(); - - saveDataset( - currentMdStoreVersion - .union(mdstore) - .groupByKey( - (MapFunction) MetadataRecord::getId, - Encoders.STRING()) - .agg(aggregator) - .map((MapFunction, MetadataRecord>) Tuple2::_2, encoder), - targetPath); - - } else { - saveDataset(mdstore, targetPath); - } - - final Long total = spark.read().load(targetPath).count(); - - AggregationUtility.writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size"); - }); - } - - private static void saveDataset(final Dataset currentMdStore, final String targetPath) { - currentMdStore - .write() - .mode(SaveMode.Overwrite) - .format("parquet") - .save(targetPath); - - } - public static MetadataRecord parseRecord( final String input, final String xpath, @@ -219,7 +226,7 @@ public class GenerateNativeStoreSparkJob { invalidRecords.add(1); return null; } - return new MetadataRecord(originalIdentifier, encoding, provenance, input, dateOfCollection); + return new MetadataRecord(originalIdentifier, encoding, provenance, document.asXML(), dateOfCollection); } catch (Throwable e) { invalidRecords.add(1); return null; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java index 29ae98c5bd..e24b9ad1da 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java @@ -1,11 +1,6 @@ package eu.dnetlib.dhp.collection.worker; -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.util.Properties; - import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -16,7 +11,6 @@ import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; -import eu.dnetlib.dhp.common.rest.DNetRestClient; /** * DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into HDFS. This module @@ -31,7 +25,7 @@ public class CollectorWorkerApplication { private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory(); - public static String SEQUENTIAL_FILE_NAME = "/sequence_file"; + public static String SEQUENCE_FILE_NAME = "/sequence_file"; /** * @param args @@ -61,7 +55,7 @@ public class CollectorWorkerApplication { final ApiDescriptor api = jsonMapper.readValue(apiDescriptor, ApiDescriptor.class); final CollectorWorker worker = new CollectorWorker(collectorPluginFactory, api, hdfsuri, - currentVersion.getHdfsPath() + SEQUENTIAL_FILE_NAME); + currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME); worker.collect(); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java new file mode 100644 index 0000000000..715ad8fa6f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -0,0 +1,169 @@ + +package eu.dnetlib.dhp.collection; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.model.mdstore.MetadataRecord; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class GenerateNativeStoreSparkJobTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static Encoder encoder; + + private static final String encoding = "XML"; + private static final String dateOfCollection = System.currentTimeMillis() + ""; + private static final String xpath = "//*[local-name()='header']/*[local-name()='identifier']"; + private static String provenance; + + private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJobTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + provenance = IOUtils.toString(GenerateNativeStoreSparkJobTest.class.getResourceAsStream("provenance.json")); + workingDir = Files.createTempDirectory(GenerateNativeStoreSparkJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + + conf.setAppName(GenerateNativeStoreSparkJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + encoder = Encoders.bean(MetadataRecord.class); + spark = SparkSession + .builder() + .appName(GenerateNativeStoreSparkJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + @Order(1) + public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + + MDStoreVersion mdStoreV1 = prepareVersion("mdStoreVersion_1.json"); + FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); + + IOUtils + .copy( + getClass().getResourceAsStream("sequence_file"), + new FileOutputStream(mdStoreV1.getHdfsPath() + "/sequence_file")); + + GenerateNativeStoreSparkJob + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-encoding", encoding, + "-dateOfCollection", dateOfCollection, + "-provenance", provenance, + "-xpath", xpath, + "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), + "-readMdStoreVersion", "", + "-workflowId", "abc" + }); + + verify(mdStoreV1); + } + + @Test + @Order(2) + public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + + MDStoreVersion mdStoreV2 = prepareVersion("mdStoreVersion_2.json"); + FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); + + IOUtils + .copy( + getClass().getResourceAsStream("sequence_file"), + new FileOutputStream(mdStoreV2.getHdfsPath() + "/sequence_file")); + + MDStoreVersion mdStoreV1 = prepareVersion("mdStoreVersion_1.json"); + + GenerateNativeStoreSparkJob + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-encoding", encoding, + "-dateOfCollection", dateOfCollection, + "-provenance", provenance, + "-xpath", xpath, + "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), + "-readMdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), + "-workflowId", "abc" + }); + + verify(mdStoreV2); + } + + protected void verify(MDStoreVersion mdStoreVersion) throws IOException { + Assertions.assertTrue(new File(mdStoreVersion.getHdfsPath()).exists()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + long seqFileSize = sc + .sequenceFile(mdStoreVersion.getHdfsPath() + "/sequence_file", IntWritable.class, Text.class) + .count(); + + final Dataset mdstore = spark.read().load(mdStoreVersion.getHdfsPath() + "/store").as(encoder); + long mdStoreSize = mdstore.count(); + + long declaredSize = Long.parseLong(IOUtils.toString(new FileReader(mdStoreVersion.getHdfsPath() + "/size"))); + + Assertions.assertEquals(seqFileSize, declaredSize, "the size must be equal"); + Assertions.assertEquals(seqFileSize, mdStoreSize, "the size must be equal"); + + long uniqueIds = mdstore + .map((MapFunction) MetadataRecord::getId, Encoders.STRING()) + .distinct() + .count(); + + Assertions.assertEquals(seqFileSize, uniqueIds, "the size must be equal"); + } + + private MDStoreVersion prepareVersion(String filename) throws IOException { + MDStoreVersion mdstore = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResource(filename)), MDStoreVersion.class); + mdstore.setHdfsPath(String.format(mdstore.getHdfsPath(), workingDir.toString())); + return mdstore; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json deleted file mode 100644 index 4ffc33d247..0000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/input.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "md-7557225f-77cc-407d-bdf4-d2fe03131464-1611935085410", - "mdstore": "md-7557225f-77cc-407d-bdf4-d2fe03131464", - "writing": true, - "readCount": 0, - "lastUpdate": null, - "size": 0, - "hdfsPath": "/data/dnet.dev/mdstore/md-7557225f-77cc-407d-bdf4-d2fe03131464/md-7557225f-77cc-407d-bdf4-d2fe03131464-1611935085410" -} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_1.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_1.json new file mode 100644 index 0000000000..8945c3d881 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_1.json @@ -0,0 +1,9 @@ +{ + "id":"md-84e86d00-5771-4ed9-b17f-177ef4b46e42-1612187678801", + "mdstore":"md-84e86d00-5771-4ed9-b17f-177ef4b46e42", + "writing":true, + "readCount":0, + "lastUpdate":null, + "size":0, + "hdfsPath":"%s/mdstore/md-84e86d00-5771-4ed9-b17f-177ef4b46e42/v1" +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_2.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_2.json new file mode 100644 index 0000000000..c3d4617cb8 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreVersion_2.json @@ -0,0 +1,9 @@ +{ + "id":"md-84e86d00-5771-4ed9-b17f-177ef4b46e42-1612187459108", + "mdstore":"md-84e86d00-5771-4ed9-b17f-177ef4b46e42", + "writing":false, + "readCount":1, + "lastUpdate":1612187563099, + "size":71, + "hdfsPath":"%s/mdstore/md-84e86d00-5771-4ed9-b17f-177ef4b46e42/v2" +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/provenance.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/provenance.json new file mode 100644 index 0000000000..2cf0dab706 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/provenance.json @@ -0,0 +1,5 @@ +{ + "datasourceId":"74912366-d6df-49c1-a1fd-8a52fa98ce5f_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU\u003d", + "datasourceName":"PSNC Institutional Repository", + "nsPrefix":"psnc______pl" +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/sequence_file b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/sequence_file new file mode 100644 index 0000000000000000000000000000000000000000..309645a5f2526ed6da0127e67a0664f7b79d96ff GIT binary patch literal 52308 zcmZ^qV{{#G)9o8Kwr$&PY}>Z&q_J(=wrv}YZJX^qeV(qn?ppVK&&QneEx(!no;`b% zL=?aY>>SPL4D1bz%uVRb4UFyV?CC7*=p<~N)f_FH4GgVKF#pFND4V!D1AU|VAHOrQ zv$1zHadM&)HZe7Dv33@+Gd3~$2KZEO&2&XkDHUiFo?j6_Dh_=62K)=~XVAajxn~up zY}u|epp4qB@`;QL*GNjQH)|F41(~zUT%=0Um#8coXMwa~Y&2wpyN7kpH;JuNUmivl zqsE&@w)M@t{h0}$4SjnuMZnab7}_?ZE4peanK_G#W0CPG7El~4`2?z}oeI)^?oIIN z71He9$~K2$0T1kTCBh4I_MUPmr+^2;e_NM4wn79K8$~SV-PyXkMIK)AcX6)q`p70Q zcjIe$fwv17WZ?L|ga0RN?9a)Vm>0Bdng3Hw2zsE^+=OH46w_2nDKvzrqQV9rkW6%D zp%QkyjMhSD3e zsliml!uQB0S5(Ed1XarFac4te+lW|!AHH=Yw}H)91NUNl3xS*7y^D}zFAt4`nFCoZ z?L+r*F!ClI<`2`%Dp|vgf{!h&Pv~qT3$eHIIBviVKZOL2iZ&JsmLs>f;bh<0zcyZ@ zVqu=vw;^-w#e!37rx!k~PjMQcIyx^5qEpiX#$e>7<&YmDB`zbXQ~dsXj?`w~@!mv~ z$kR}3kFP?xY<#nJ5_}qZPd%N}1}4i=XBwj5z=4!r7%Y!1W3{1*n5IS8CMHqcm{roUKh*m&r7?zMykUU^n<;r)=HfFz8=mwvedFSriHxTlF zfCB*lSN$Wq!GQ3)lP7qU{~0|xlZh=%u(X9>2ppzbT$&(A`((XC#%R*@`<2jTbP4Z* zu0g6EskAYbyoe?4&db(YC%2v-o2?J5_W4g9nt?GKmeLif*aZ>^oN@xg47p)m46$>_ zuNCfm+)|m$t?CedFtnc|da!z+2pWuNZAsGZ`52rN!kY^DC%%>+O%1)v1h01>rv2(I z@>{YSE?QUJs%9?>TTcsSf7VN5KgMF8!7t^3ZggR0A$9~Lh^v;Q=ymXT)#nhQscYc{ z7;Vr`mtLEK5~d!s+Pa+T!j~QhPrds0rZ??kWQGgn?M;KD*x--#gmP7x+P_W8d!>+J zD0!gU)9-p%xW9QeLXF#4H2zf8r&=*Ejo3gCB%^V`b+h@)8|}a z^8a;ZgN|Kro8EbXg>l?K_w#_i*Ke<|@Q95QSu?|XIw7H=&raBt?il!Ki%d^l*i#`- zEtar5=tOwPr(UJXphpJ0&d~D>DDh=D{T%h?rvCy7kv|#UoTP6$ZLJP73le=~Px48_Pf)9ZV6H?~5yi z{)b1jFTbQsnBw9^0zfK_-01^Aveb)H&o?U`9;f=eAiLUpDjd*&6pRbRz7l_LsrcEa z?qJJ`1b9Ak(aMEpTU59c^EuO%IP1(*2TPUCqUH@sALVtvAPi$j%w(*N{3m?pxVQVWqwGUL?dp*4P z+tHC=L0Ni~q6agR>|MM>p*_w>lT%Np&@DRjalc1O`k606$e=#cCVv}Du805zu5^vz z`v=aO!j69~_U>Zj+uI#C!@5MR4d(LxBsaWjebd7`C`==tP$69jZBi8te-Jengr0fk zRKIUL4&Kz4{*JV7qU221lBJi~Z`CLZ@k}Vno(^SJZ{mg?_Ac z|MblgY89Rk6jlG?J&MiKZO*sQ=>0QEAO7!x?|YZiS@eWPLgjTlC=kM&nC;7x>>c65 z{O#f`tGHX7{>Au4@?DRVOK@V8O08lbiEt%%2eG5%jg!AdF-;{@qi7tHZ)dSci~W56 zg!s`(C~T{jNcNDVqs2)Qg;QsTR?Sn04_hzuL`G1Kk3|pBSiwV1lI$G~oEJP1=gEfx7Q(4DqpGUQy`?>2L3}a)Zj`2E% z2&A6-g>47Q{1W;u>)1~nB9IQ5vL#h#$}CPUmPSjtof?2D>4-yrC|J4x zqDPf3zOa{a+aB73m*O+E#&&?FdQm;u%F_Kk%he=-U6oxV*A-)6*{ekIM7K!F&I zwhM?AG+U?6f0_2Q4uU%}5DJB+Q1HQ(jMq&{2gm9tes^pGCr8{N=%cuKVT;KpLhVRR_aNUnPA zqRExO1wj_Eu=(j30ScldxoLnj7?Jo;Q(^TXZZ_y|33}t`hZTRZUIyUM6@dzf?wT8X z7r9u^QMKQlc^43)rTl`=_OkCmt*0YcZ@Z4(jPDVe8hc6OfeW8?&gLW}(W_=<{cTBE z>v7A#4Q>~4P?PIcmVqVcWEuiz7|7bBDQ~!bfjXLZxv`g5P}>)-KF?pD4{5WE!Ni2+ zyd+|V8i;Glld}~Qf7O%gsA~#a!3sPjrEDAM{?@KJCqGY?;j`ZU+uRI-j+X(y|0U@287)Q8rxmcRKupI`%sphlz*b^%7Ce8a7GcL z;?0_gns`vz+Jdu?U5HoYwF%@w*+F06LGVRtdZZmLEFLM+h@5QeKqCDCQkj@79qEz~ zHMToRLc8zk*|4mrWMwYGoI&Hr<6{e}lKochOVd=yI@r3AAA+_4E3`+w~s^K9wjv14sGvptZ0ONE#!kL_TFn&`|N3tPvF^yg{^9HhqMSuwrKg6l{>CEd>=7vUQGE&B4_4nakCf=VG(^E1UInGBszx%$6L}@pPw@^u@8V zrTY{gbI@Q%RDGj05E>m>0+{%$1tn?DuFGLFG_C3b_Zh#hVM=+g*4ASW{uz&lb-VRH zc{_YK8jhkR%=gLJcQ1m8GjZ2aCh~3Eo$4i)9btclF59sMr2`dROg*k5=HQC^ zP=bouQMbl~HIY&zFmdJTZOkaKwwFrE@x9gLzR`?{68b3gH0yod3}W_Lsi?zw~vh1k~DdKB9E7StN}y)hI3q6~`4|l&ch$X|8}+ zt9z++O2K#w^Ov?(k`o~P*#K*_d|QtwLkZP{Rba5wf;Os59Ve-LWoxRssN&|y2czPYV)XC zURetz87pCV*$mqQqh>$sn)bN}%zHdDhvvuW`jR6NM`6n3<*i{3u#jS=F;t4-jRm7V z&QWU+7$kKyV$z)f9A$k`W8sRqW%)2##3+IOhWdWzZ@f;X&s?0hGqmjoo?LHC-BD{e z%By>rt9ZsADSQ6E__|U|=yjC~oH$l=(m|{74V7qGfg=Jy1R6DZ3x&y{^mbrt=tfiB zS{)Pr{C-02g`WvJ2rCK>9$ZnPePGSxZf8yyU})=&ojc>MS4_3Y<0v9KBHW^3rIF17 zvM$qA1Lj^4Kq9U{Z>WmN;V?>9Y<$Ddp3MV;f~nWz@lwre8^Q6Uzx zSj$tR@XGt>A?zZNEtV~}DNrn~uo6^esy-zxM=7%@^05ina?!IWT_OCm{7VL!@F zpY$eRjs*6pOuY>u_u2GRNqN7vns}UA0MK``>zUmd@SO@Vt8My+-O3F$y{lLthpSb! zdq%POe!@&fD4~c*bsnig;M#Nh+my$#6?0f>!fEPUTy$= z_iaVZRMr+8f2@3mIJ<2t0__JUhPRbSfYAAn^sb@;q9o%}vrKxe35Ir-Sdr_tt;cI5 zhl@V0jI;OpMfcV7OlSWu5_eqi(pT^Bu^p$wQFmzDD*xfugNr?}xe2J!DK46n8#Op- zNfVn7Bo{vtLa)TIqmz7Xmz@KowXoIno*n3!X}4z_HWV*Mq^WI*FP=nGroO8;3{ z#!HJ8RsA`kF{WV=U8jwI9LJR6>^g@fZ6<0fbcaD=9Zc$ve`%zq`k6?~zY$U>2Ym10 z%xVfPZmw2j9-9PIQ1$)F^+7UOlc!Sn=2qR$gj17WP$A16DI!2fV$q|w(1bijcN>-- zBC&s~uUz4=f*|IRY0Xw?ba*Wr@ z3!`q3P7M<+^N;#anQ$@ZL`?M@ccC@;#+vW0Wq}m7L=v?J6B_JIf)O7+t?M^x>_= zjQP{|*=j7AE7MGmPH29EXea9={4aX4?;oBJ|G^VrR-pJ~?NT@JU4{?Af@u~sDXm@s zL1Jjer-jTJXKjo~8EUl%t@{rBHedg3-dmShayB4b#8kjH^vrp(z3Jui{h!u@JgsXIZ6BRBdr` zoLqjvp8AQ2A{8GEttrpK=!2J{!X`vOM%=QZG3FDgdKbx>(1i)?s}HkF{bRVj{FCn} zN|SE*di)a_Mxqe#?~Z6LH3_Uw)+0s}uEL7W)4X9LL``vKTLPCk;?e=&si=;g@|x2- zOb6qKQg~Xu$3f2b=Iy{|!f3p71UxFl@V7N;*yT)Cpv_9o|H8eXl@2PzH&mc$rA&f^ zphSHz5G&4Bczt)h_x0xO!PQD12hQ63{QU;v_m;&Stki9VIUbNn;?|^1NjohWK^mFu z6;qvrh7?L`ris?(=Egn{Sr@JnY=TAcTt|BgrgMaq0AdDAU6I<{7t)+Kgt{DFrC1Cx z(>ztC@fAA*dx`=^SC^&DLs^HXi!9_t`V($hKufgYRzMZWB z^Q?>@XmXfJycXJ~$`nSe!cW($7Rel$9UZRHFMsaScAr0GC(kjmTLPK7lyQq5R&vSG zR+g~UVOR?;t?SfF_BVa_*34^2FG3B?yYqZCSjXDyu1~$Z?;zggja+~>JN}R3?*NWf z0qz#SMZooms6y`~a#$Sr%dr|6?0-2%aJ*95Uk<0;Ahf|Zv}3u7e}-~OD1kIKU>A>A_;|6K<^1kYY z=km4ZdBrR6J4xA@uwqbxGECXGrje8p%_ffMOHyE8$@>Qz8|fx5pXJ1VwAH&aRyfYC`)e-@8WM==eo zW1SPZfKJae|4U~@%hFPbze$#H%6<$MjzbnN#AbY>@b>EJ&>4Vn@riXw-2qpE5i>f~ z2psARO)-%cC$p&$84L|hLk4xHP(f#B{PZiV#kd@bK6VuuEV_F(l{yX=vbs@o#do%{ zY>JvX%`kxJ_6r@KYn5ok^9w6ywogN%5dCtlz3#WsxK)_#4~H)JehbNQEUnTg%9vbI zigOXMxl5FKS$mJn2>r-k@%yGC4k-+V+`&$y9Lzrof@sr}4q9>o@L{3uXI6)7tn&2w z9ouE-YK(~oT56Vwd^`)adxX*2EwJhpRky$-xN3Jv%Sg&tpJ{1o$!(x3*uW5uJVXen z#FzE?PQ{Ip<1wTdSg3;CqTsIT%9@tML=#KQOsA!GGY4un+8MuVkO>o_y_isXHD9nZ zv%lS0zuzrzp22Sl%wBzn@wU1-@55!hItHNdvxq7~=?YNKBbEI`ueW@bv(W|&&7}pu zfVBwMpFb5QYb+NDa*>Z>2Ttklla{&xKpX!98uGuIbrGOh30d_F9Cw#I50WBQ9HT0d zKoqs8kRXLs^2a?Bsv5R+u58aq@cZ!z`NH+|aYp;|G@+L6`!9TMrrk}S-!`D7X&h}D z^^>KJz_;38$V{s!BIL1^_20C43ZR>_>)}&xH25#;ZPz&^kH2GTdU8n$hfGA4=M{q^ zN-;u4EAg-}#msm?T3x!u0Cyz7@NmJmOMG-waGho6KF*HJYv4TXFGkWsess&d2f3Y! zfqQteci{R87iQnC*Bm&$*&fYP#YL~GnfJYC1#|>0FX>~yacbVtz6)*)f4?f+eYL(a zJ26c`rsQzXU)~%4gq2n=Oyu;;TNnDJZ+WyDHHuj)N?fxNDkC0jvW43$z$``GD!(oo zzA?~15%s*5jO;f=A=`TETZ>*l%f3p?H;N-0RB$Jc6MiIo*+O6Iq7vyR$;g3&U85~p zm@BW)b&E6^WCUr+c8c{&CNcG8uyXS5=qoM6I8aib0da}o$^r5AY3bZ>`oup#9~gUm zY2-XTSQ|YL6!cefn!3BYxW&60P=)#?k^I4W!B(HCg@XeQj01?P4)mougv6%5&S@@x z&m!z-zxv58+n>S*$Z8UQ?23GOIjE$Qj%XQNUnRisUbA4eIFT3Fu1ixn^b$<$J)C8k z8EAKerId?n+FJ;41}eG-NUKTB~&4`oy|!xguOmR-B^8l=G8> z-@x|JObY*tFsT2>AJkv|{yVW(4X9aroJs|tFAB@h9z?_`3zDJ-i&tZbESvK7p*41{ z>lp45zl*8xm-o&wj?`ceBU!}ZW883>i87mw^6PE|fwwU+YRn*8bWWE$apkedD9KsG zP2HBef4iV$&QI+Ix^lN!kHYfWLs5qT|8o^g1b#N8u3wh+9m(Owg1Wslv5)^*NR!j< z67e&b(McaS^P+6=`AhYqZJD#!_sad)i#=#>-5l%|m64mycCBZr72mzFTW> zOL&7P)`2+2d?DTYfri8}VOB29$d|Ck17CWM`KOf=Ea;Vj>^<((91=QUhNYML@e;#Ja!Vj1aUM5EGQZ$faZSm;TF z0_&Ke-HHzBZ03rjoTF7wJ`TP6!1;KgSGd?h7nBK08Qr9gY|}??RXi?NvYa^RR-%Qh z!kr{07%>nuZaBFzaZoBds@BBKqggBR>nEc$0cRk?l4Sl`eyMCw0QvK!K&XU-O-Q?H zxYSqHz|g3Y;U`td&zBM02WXs||Grq%1JEwMzs=rXJ7sfgSv6{W%FMMn0?e)5`+Es)wW`rhQ ztdgx#!7Fya;Z_tFV#y6NV~ANY@?5@N3{fgwRy7&T{RaKxpcnT)`!^)_zxy|L$E5JK zLcv*Hi`S;xt_Gpo6NssRdg~I+=#Gn(bMoeqy=t%1Js11i;ZnbxLcVqEIw7zw7hW7^ zyC45uz;QmKp*ar?3|ukGOK)IuvGPsJH#~o$(b|1uOY!6a{i5OM()gIJFMm^_;Aj*S z1?-n=^W~tEv;0lcyi!mwq~2I}b^8`qZWkV0aTC@T&cSK*Eo%mwDfvPWR>F&G4vt%( ziGSZC-qcQGKXpjm(9?%_12d~4BesYQ0Z|h3H1>E9E^2pC2{&|CH!=XV^u$Wk4}`>l zUIx=(u~$-6wS2Vp{WCU&_SpBLL=yema zJe0=zxHy6DLV+ZSXB&x;lFoI^I%y-ol4{p9q!fV~j*X?wN|TYd@YV(qifs5!|I4z* zphK(!uvANjv@WYAx&9FfSSJ|v2tV=zqa!p@#MP%4NPFQxGE`D}DOhZsm;xX5GuWuo z%$(t@CE&Z(jKAsBg36^RV20axWEq5SmGGuiB#B#cXO|XFQincMawd3=SW{Liv)w9# z6~f5nE9cc&Inh`+1oG;tPqFj`nzF?4WWLFn5*^e;(Lk|O=M0J=vN-se=|*UEO)WxS z{9%1HMT>N>1qJ~9(7!$nVa1M5QyLfNsArOX15A9+FZEqlb#aqG%Z!> zMOTPu9+q%+KP#%(C5Mok7Sw^V6+QYLG%pP!(gJ;6TseWhCv!aE{ zgNo*RPbJ<9rr8UW{cVI2!1(^-9^ruzyuRwS3?z!>iYVK56HP~$gu^)wskdx{_A|A%$pA`GX(VptL ziaJef3YxR1@WB%yMS;zER>t>Ld1X&uNHq?y&NxrJz@8%%-Kj4_8>(KkETl=37C?6n zL9{hy0+Z6dl!(_)ov@1viBc^_RK2*ODry+cW^DpeYDX(B-v9?P(RbJd!`>*~gZHL+ zmU_s!fq0!fBq@O`DwMo>bu z`iqauAx>6iS61dfGmQpPiEmW?adIb9c5yQ+uwJNvu)32Ph9xCfP-U`^PAZr2UxGw zPw7Mx!Bsk~WCRW91~eIdf(oT#mf98ar2i0 zpcifan!6*DM+=Eip!3c%*ZuB_qpw|l5gWSD?4BQ9dpP2t>)ltuG{6SGqS}i|Xm+j7 zDT&KU-RJIvw0~{y2wg)F14^MmzT0*3CA0d#^y74bAL>(f&ejk5uGdHG<`8x*9C*Dm zEeawvugF04+$D}Klp0zSLHErYIKsTsUQ#AZieiGLH-s7^P)E{-9mr>vqATNLxux8A z0+TdFqP~+c7EDMR@(`HDp@-bUTZg1qj3vxG2UDPDn^wS4W0N8eHBFLbR(Oleeheql z2Qd8Azh*4uRFFQvV{sx`vzk#wJ`V6#(3fJUnDM{`>Ryk*(7x z++3JEcq_fZJ8(u~(hnsvUE#P)M5h9Fh*M;bj3JUmt-O{P;|XmaacDQt`6uZ~Qc4Y| zU0E7U+oTjzVz9iSLswEpp%5SwM@afT=kVn&3?FZ#jOQM%tjd5F-HWoJ;sO!D8Wmb` zi?uc;;zw7n-^x4?Gex>_n%v=vCZYXQ8OgUeNyy3>A808nNe&_^n4laJ3%FtlDKf`_ z>61r4pf0|5sFL5th-}5^QI*R^bBX+s%RI@8T&UI5NDJiVYY8a1w#B-x8>z(k!`K=h zUebL1hh`Xt<@<@RGiw)4%*RHZSz8t{7#T^82@qmxogxc_<*tVJdA1@Za{r#gc*8KS zez}s~Sppj`%pK^ceXh&RfWj-JX%{Y0w@+(DRI2msAwOPsFFNog72pY2`v*PTUzww0 zi^YgAs+XBHa6~9)FfFFoTp0|7SX2T_@}G?w)d4OVgppPYJ2XxKd&S>9@HTyiMI(|7 z0+KuLpBGHiqtz0nH<$ERt|?)b9mn0lYu&iehe$c5&Rg&}<*! z;RC?p5Sl(*5$X_vOq`BW5*TtD(@UfBa`t~qu=gcj4T2o%3W(ESNU2urSTt`;uWXo} zJeK&Jx5|CWo>=|-bIIM+$gW=sW&X#K3sW~RAw;8`-M0g$n$}#v{pb#pqTp(RlNE@r znBw6RDTN;>Q@lGXz*ise0{A*uQ{^~_hMOo};!o{_0M+0eLeV(1lb^kHBz~eP@)voS zyuNw%0*~7`6vn9+)_@!N{jO*xBzKC;5fR^_|q;dOW5X2ID5Hyq-Hn+Vq9 zX^;^Ofi1$c?Sl&AM5a3AR>?BV!O6C|xq7p*Vrvu>jNIIOR>6V`H_4ZOYyz!#l5VIE z-ebTTNIjN-qEvJvC{>bvJUxVZWs-7>8VO=<@`pZRvXD~(sLuf#8w}%^-!OWN=YeIs zyTmK24xCE6fVVJ^K-ijUmKK+w4DwQF+N7eLR)zXeKYkEF90CI}VGKNL&@o`C8_mx{ zDPyLWJ>$}kUF)KGuDDo0z+Rzxc)V(gdE$R_KJn{hqJ4>45t~Ax0*6sO_(3rz((#8$ z$BnUt66UH(a^|;CjqU^a1;>dlcIb4N;ySpwMy?QIv}f6l;>7c2g_F~rixOEI<0=cA z7(NL`2-tvaWM}3MJQ?uL#&Ks(V$>0misGMPk{T8n(5$yKp-=v5rtLdEYKpEMVo6*t z$qJ^6_(DAWj6BA~D^M9|@V)_2Zizj9r>e=^1l_pzeh^y zCV&*(GbNAjC3>`kRtqJRbd;=8>w-$*LW%0KGY14l0PZhY5T2oJ^L64Y)Z?L`k`1PL zH2>ZGn~!U6x97XBUku7)MG<3;h|pUW($NyYl8rPE#sR=6nfF>tIm*cI zFE0a>fjzEJP%q^w zB27lg$t^(5DjYA$fLGXZ43S&$b+ne?ir@qpEMDVHlk`w7*YxOwV_ zJqeS7V}ZUx+K&QeKE-FL1O=>dYmpxNs1v66`W&aM_}0x7Wvncx1-R$om=w5n#f7jWOs>(3!N?Mu;sY=?sa))O>aJIhLB)}7h%L!Zhpf3nHZ&xbU z{_;FBtnai*fF7Gmk441k<5J;!sKz5^WRbj=`~jl>>ClO7DmzkA+A;X&NUF3D zlS!#m-2$e~3VN*xEGYpSl{{T9)5Y`o8?HyS>-c1Trmik&GYN)8p}uVkuX%k8Yln}D$hfonX!TSeEir${dd}d)Wu8J7J{p#U>b8{Kqq3ep|$|sAV9RHD9Mi zwn6+(*K^bP;AVyo*cyIEk>L1ga60V{#x?xWy<3SmyC@SKr%^TD}%EK z645w3v41OKiSVN_vm|_~WcZ*8BrOk^*Wlw6g!T@hAHp_D{u1J#8;g&z_ z&h_;1xmpEF>@3gGPubJ&+gl}+5#RxM`&R*o|55e-a-k7(&@~-xejsSYBE@+Ybur(A-n+LRr_XGoTwlNE!PFmZFj-Gj zh7~*N=CnLP2_z2f-$F<==CI}IQk@RbhOF>b7R=_kS8D5Go`X!?13iQ#y*0ij zKOx(<5QbbyYTQa}zd0?GBvi|*pT{$MVom-;L!c*UVdk;ASs_?KXN58cBQwN2sxieR z8VeS&Y3+W}q4p#fU6|hDu2#kN!OfK&16F;IvQ?9ZLd48X&#fAG z@vR3@L*8C;q1l?FACYZ$JrnNTsl4 zGbU?mV3U)o_ybBHDrD(&!cj%Q^t9dczoC+#W(2 zC;=?CQqGJh$TS@_CCsBBeKxPj!iyR2mH(E2GLH`Myh~K_NMEase`HG^sY$;fWusMN z%q&h`%?1~ZL%P-cWO*4_dL=#VK_~0PC#~bi5!9;fH)w@zS#i)tBwK-$JSL2mx=S7^ zCjx3AP%s5oOZh$t4}oq4=$t5y#gM#O6Bv5)xf|SMfePeOy1AXibHda8^)ge9qCihE z3K-UDuv+U+zRLZ2lI97&Lu+s(3=PARePgXF93y~&9eL0MU*saKa&G*p$r`=wqaEtiV=&$)DVhpKAas4Jd&4 ze+7U9D1iSaN0OhJl520>*ObB`lPn_zOQ7E6lUvFNY&2D5M%sUd0u;WIIlrMF%T?kt zw3GNigKhlj51!rEET`HT-ga-VFRG;<#WC|_djpS|QdTNI?>xJo0jqfyodyVi#{=!{ z z>r^ggzIXTW!+s{t68`qNImF%mnN5*5@9xOk21W|osK^ig>(1Lq0D6ErHZ(7+C7jw) zP+M`n<(^QppLMNysS@%OLUd36D$q*_u+x;JNi_Upngb%FjcE*=!f_+PrpG=`&j@Rn zK~6k2`vFEWx^F5_J*QmphIU@#Ke(cP>Gd0~lkqYZ>*5*>a7W35gDv zYU^R3r=Iu3k9E7sM4I?lHqzEe`JGDu^4=I0A}dU!x7?i>YG)HR#B8y#laN`pQNxl@ zRH#Ki#h%5GS%w89a3pK3eF@SK_!nv3-SG6&@Ea}FfQuh~`zt!?y`eIQLt-Vhn@El# zAUwzNle_?5%0HkvbtvsL`Sb0d^CQ8CB^i=Y4zK}2WN`yln-M1aO&Qm{=3>phuT=Zh zb7(R*a3!W9MZ|2zrtvG=4lc8iDKWhIh*G78TYdN;At%w%e`0-ozM%;NJ#`{tka0F7 z(nw-*^(0VZ;t4R3g1)Pk-lhzH>9*1+ZiTu8|K2X7H$cy?an)!F^wRO>!-!#O)N1!w z@<9{zmNZfm79lc0RnVybya!jln>ra5BkMAu z@qksZYr{}sFq`UwcEkIPU+)pxBizvQf6*&G|6oP_3+sPV{6dOZ*Fc`^7eIeGiI`g;k4b!wbY=zJR&=&UVFIwGLSWCU1} zq**c`CZMn=a{C;^(FZe#j&d2u&?54J2?}!0sBfs+!)vuGPmqp zE+v{qoQfc1*`cl-m9ETUxf}4o#*P2tYGFYrCd6j;7gwc!xTY_G0dNHa;QF*Y{06(u zx*7_fZuLh86hF$6pvKW=k8WJ3+`uCW{PxFV3EEmcTmlAichXL;j|)AgXlnz(@t&Xa zcEGXh?n3(5kRDh?mLrpcMpj@QX;js~WTHMLs#`)~c^ zS<4P`5C4(V^~H)9#u^hWBe;_;Dux-->e@N0#<)sQ(CM)rmQq!pM+t(FZ8|EqZyNj45}MW zTok+TSv5whv4qO7925qPS;O15QaY0U;2b4-^{oPD^`nlxCrz z3K{BZ%|A`b>rDXB%n1nx4j3dQ@C|G0OVggky@D~nV{S>Nwg_qKe|+>9FN1A zkRZ?rS;f~WM{~um42V$7I%PtO*OMT2gPAyS3ZX-gm)X6@kM3;W6ZH{_KQtfBBPhz0 zF2Qh?&=vaADoa}TYnPtnkK8|}nBdV5VI_@(-+u6po;^QzeC;JOJ5O_C_&(3wxDeaj z{?!Y2^yDW8@C0oI8I#3YFtO-Z#t|AuSZIi_WhFc+copTU7H{L4Vgz+d;hcSb%Z{@G zC)y*(aji*xfbC@$dpuYa(kDyl7v!TnF(uO6+7KdoyJ5q=l=xF)g#Z&|v&xWi-o&WM+Qv z$9(TEn^B&0oE$B$!0U~kM()ikxQ;j>7VDH6#=C0{UaUqzv38>D*a13CV+H9 zV8N?`AIl4pl`H(wl9{S1i4JhPBlUo|6JR|2?~S3Xpm_%?u9X5^ol_5>)84#hdHWR# zfeDw5X^=!+_*$7p!5~aDOd%t2s2Yo&meg2-Z9hne(*CXU7J=Eqgn92l{gH$_I9S&`Zkh ziZv}Bl&K9*2aew#B?BuzMWBaA2SXmGas3%myh_?I`y4?v=vk@&PejAV6or#={;Mt=J+ja@tAZc)wTB8E`?1 z#?_FFzg~o4iE#)70rOzdOR8ao(9?~ZgR_ORC3@mq1&BMTDTbF1gpVHU9s}1Zg5BjX2cfD?M(9VcN9J zi0~wSlL&2ZWbC#~c;)Pr+qa)}?4~pzlAu;yvJM>jB0JjZ0+S1pwlQkcm@Es-Wr^t) z=uk4rE6UqaH6#=iRW{$lO85pgbqhAP#1f@k{3H^nNO3GPBf^HVr4iUEL)o4$`(wZK z9%b(M_+tIW-f?zlR(K$Fl6TQ_^ZEg>pf8>?+M2{sEu?>o^^@oQ+&JVe9W@)p9*0&Q zj~i6vu|bosgHlaT`2kinSHcO_j3_TP3jPvSJnmBs63`8XcZBQ@wZFE4LO|_nEf;Ri zc1^?$lj<GcTgf7TixDF5I``wRbnqbX#1 zd-rptyTb@cOO9a)EQ$(FN?}VC3d*z>Kx{2;YMmDtuVKFxmgUaroue3CGTMkh-~5|T zvM^`UBmG!H{K3gwPNx#o(*}>1G>?!IOwy@QzgPT9;ty~r&WWo$06J^4S-8Am380)q zVEJtY$^&ZfmUf_^cs=TUNJX4hX}GfiEq-(-JJ_#>pa?y(ApFE_Gi%ihubI7Fz4~x> z{@eq0c;i$g<^|$f_VcMWG_{odKbz0?T>M#6Kw#D3=vho&c>^>%<7bk*+1VSd_HWWP z>2ve%fGoc|^Gl|Y(iz3^-@xi||ECp3TtpnCdPI9^dr#TnOWYeFHPs;fOeU(hfm8;y zIp8P?mtN+Mai~t3=1$wrhMV~_JhrNn<)&!^@6FSJ&sa&^JQA7)zWmMrqEw`+slIuNDZ`k7TNa0u0MR7g+m8=E*Zv8Gy+n&VbG4JF06In3`5GrE z=!z54eyC=|Vby{23l~lr|Rca!rzVS##LaC!Ot)q|CFF{rVfLC0h8=1 z0}pg;>w}4NLW8VGIl=LXdROBTChp`l-vK<$>;W%4JWnrx|D99e^yuB&4c?8#woIY6 zhhP``4NWKKHl+ZnNz3EQ^SXlvE`&UFMxga6|KRH7S!+wRHfC~6xlxmrUy2bNHh@?D zm8krnqK8Q6QG#gBNLj5+m&`HL74D%|DagLePc zu(Nl>?SlLeaCdyBCHgay-7|>Yq!i53lc5{KU-+L>7lV55-i9(Igy<_L)a_JePO_v)m%U&FAA({_2-QFm^mw! zLG!kqAQjD-E#S;*qKP8^8UrllKi39z)PsK_<9q4$0)6=j^ytZyI{H98d0YeRH#6^Np8sL(eXet_<9jSYpyT!03L!Q?+#qsFbg=-Nb(;xRXG#(H zRO=5YH))spkWu%&$=aQihL(pD&!SBTt&OcA+m+^wM2-dMVrBDJ^x9Eo+L$f|8~z$g zKN?HAFsfq<9p*P9qnjeh@DBgSVn{u{i4(%p7AqHtr7PaI-YqBtf{5X<+Ur<7NMJppR2 z?5bEN0q4#Cp>(&>bNqQ%+ZU)2M(qbIzHDS%Wh`ICu=NlHXr(P_k}@`}AB=P0GA0|R z)i4+9JxX!s-X*2gpEN)(T&-3vTjB^Y1!BPR;0h5A&4-2M76AjoFZWH-ZGiAgW~NAG z{U-b6QMiGf87^`AqB(>>#as7p4m_}7{b{%u8C#_qR=J1w`dlmdcN<7FOP|zDB zNsu1-c7}BJDvjAPz#Hl`il|jSoWuhEO4qaqDn=Kv$!&~6VjHUOg?CvhLj4#RiDtc& z>AKE=mTMlkgc4QX4-hT#k=C-bYemL z#ujI0?hR^xlE*>~>&Bsn4F-SB-SkZ=EvS|e`BgJPdy(zXO8;b(aK>8@_ArU<4(vYr zW0AcYh(S@e8gUX+mGN%3RPJ`98tH~b)M5uT_pCs!t{`b9z)a3YyZA%h7!apDmZxYS zVkKsH zRc%d_1MC38l@dW{j-vx)SDdH6LLa&W&2>Uxi z*B_3tJ9Be|Xp=?xe6c9{;>X%@?E$VA3P-Qha2klkHhGVLF`&Xutrx_JlIJA z@&3yB6M9V_9rbDd%awaXzyy7kgaxrWmwXD)McaY0CV4P`dk_ERMw-fue*Wb~ycuFZ zh_sj-Ad}h8#1Cp5uSKQ$Q_->z(N4~=3PT60kTe3ygl@R!aX~_~)NtJTNL`GV0&tE) zxI1@WfZkmkxQiXt;#rZac|ZXS6*cqIK^=7?fN(W?QL{BRSbT~@9%TGQK*hc%G~P}# zsM&S2ez#TnW2UD&lI7{q5ZoWN+IE z$>lZWBdd}tONXhK@Hhz)`_3d&X$2hP5nEGGWRX&h9H8e}5xJG}U^bxIW^xk2J41_{ z{)m@qR}4+sB^Y&yLBX=&wLb!LBzcHqIU*I6|B6=TuG#`C{Z=}$3`=ckyZhY1nqWW9 z^kheE?!3vD-X;a1pe8!q#Oe1yeeUu~DempdM)LgQk`F?Anu%%&LR+*1sV=gwS0?!nF6-s2Ck?uOJG%TAbjdCw z02T^(^54S7`Yi03znLyvu@{IMc+Y(M3`lb!^O9=%GAtCKxFvowhqKj%c>oYaK!ab$ zmwqq)62u`r3no$stc-8*lI>_a)z&>a#b#>=qJ$Zo3{7BN!r&+)icw;YMGhN}!MW?I z2Vz|O*vQtaKm9Z6{9J|`Q!64=JjpY%gXyX&}nd<;Zz zl|<0%u$pHs4R>{z=ems#1@{8;oUP$kJ&v*$yELtF;43`XurG(XN&7Ns%84{F$#9Xx z`Sspk(=l2u#jPVH$u&%UU{#qxQz)76{n~a>K7S8aoEQOc;BeAfcj~YoLp4U*7tzjp z;+sXTr+}HznNFsBuP*I?y}DL;ek%Gbj5@eGK-`sv+6LlG`gHahf@2o`{x^SS@3?`PNZJGnUs1pkV<;1;$5r)uv-2 znOI)tFnO_p%^_5sAusFRLGj6jn7oJ%A`5?KmorON?8`u}7yR9JcB-2wowL3_E#a`pn+RCkAXfc~h-S%1{7XBToPsAeHQ#k97gbd*Ws(SC=s>TLXb@JbY z*RU;w?Dn0(3fO=-2C%R>*?fN0j$)wr974L*Au|JUvoY80F2TLX-+}mH1$3mCu~Jv* z?#)Ziu4A<9^X|k8KM*SA_Xz5Jk7>!sINqU~r!_<*c6nxf_*~pP1222#zUE-m2u__nxUTnW+kUk7 zTy%uIXFt&N{&=YBY+BU&4(a5EnH^Kj4>b`fC)uNhM+ZYLGrO?MAeJv>lJ(~c8#yIU zkH{}R2qyGDyr6fze|SJy{$=lMCW77eq;JH zZBz+dWTYEc#N`Lna>jqnwF zR1m;CT%aTWtLhWVJRFq1M5BHn8xl)P+z6)nKGeCZ6$({;@|47pmj}eWlekoxwSxXu z_P6#-3I?($30RxLYrx2Fl%MPaL|2A(l5ALYDu1x>c*kn0E&3&I8)~loRdN|p9E?hn z@%zOB1kE^=9jTHC=Pt1V>jUO5vxO~GCCEmRV=O98wN*5mC~ce~;g)T&=>v%$^I@qm zf!3rp;3E3~oDbw8*M2s8MmzdF>x>S->n=ps!$EjLq?#ufwb3eWaK0$&(>MroIsPDg zgh8>(@MMOp;mh%cl+SCe_7Yc>0LxBjRfKk1(~B|(qZIpvvY3;GV#dDS1`+d5V^GfKeIAu1N4GFRhFIN8aM#-05nSqf69T>K*K^C^nS708|`S;JPiLMsc3lpJV*#3%8K!IHKqhRs)lG%gHTD ze~XPve7Ry^azaZ`FXpc?s$`_LOZ&AT^#z?VMB;b0yD8oM5GihR2B$T3$>fGiA}?Hu zjS`WJ`O*$kWyXVe8C7P|7Ehfq5H68gQ zg1vC$sRJ;ZY5sd+asL@dt+IZeSpS>BYSI}Og>T3x@M#$l`sUc{oZ~%TYQTdcvGzp9Sf;O8fABb0Mgu6@$9e=teZMt4waX8(Hx8L1q zI|K=t?q$EXC%tS^XLTg@wlIB!RE(hx26Ft#{6lage?HeLK{q?i^Lx@OE>*G)F2)Mr zs&R&qWt|O;$}*2Ntax$a$sV1cvq5+JX9_FmXH&q1IxyPb9Yn#^v5yHmv|4xC16z_0 z=DHm-mEy=S7AL0=B}ADVyV@R1)tpyvXrZH}rIzE4jWi+6bb&~o*RmFB#D|VM=XI_x za1}l?FC>Usk5-9rUfrL;NR}Z&dH{T8F>HlyAS63Z#|x(r4oyl(Yc>-a+2CFXa~FC{ zdv1REu@-cGRCwiM&zt=%TSM>P?0)kw;|T*(@OFMc_ewQ!WNBh%Cn79gPp57~}wQ0FOOjt5Z;}J5{c#y-PoxZt8 z-=wb<82vs{^Q-#(6fgWBU9i&{VJnADz1gjZ@CL)BpyV1;3>%O8>)WfUW;dbf)gv@_ zdP0bGpw! zaq$C%smhfWG;_$BTVVd2gEhj}v88{`d$V4hF%il_%h8*>KVwio&Zkt_;()59q|N~{ zwEm4*4qS!w!?qG;F_TAyHNU$=_30^5GN-MlMz)R3h~!=a`bSGTBnC0E1K^bKFnUBL zSl&qCdRyl>w7?c|vt*QwNm?If`XX?w~iW93O zsHqHjTSsb0Fru|FBiJzgeWe(E@wMo9f2!(d3~C(^gDUF6TJb*~CoZDE69hKaGv5N! zT=k@SzlnpFyz#q$xLsX7PNX9+_9m#~CWa7FM_h5ZQBtmROBSl{sB*P6q<;GvuoEbm zi-1EaMGNQ2c^p-+xR|h8@T=gYT`Dv^!LSY?nq*ID9EJ&)8Zlmr9CR>H{pUzpI01p9 z@rZjFu60BMvN~zUMjE-2$=L6S35<>cG&%Eaa17FB$qCF=BXAW6BVcTk1z3Tom9QPE z)Ii)=rpTyRS)?T^7w9!-#Borxa)(-^z?wsm{ps5I-f$A|Us2P8JX}TehHTWPu-#x~ z9Ci*)ei7=sPqtlptw9BM)va-CxA?nj8m>Eyey|H{yDgesLGEA!nI)A>22v-*O=Z!C zu${olpjBnYSQ?y#%d@-2oyS^X3b!}AeG*zS!#^wMh4g)^9;@K%QL0T%7D2lD@DCJFyNH~9ZFlb@d^>uZ!s zIuF!{dTscO+*YwH9Z<1ZJ(*eqVfpF#x`}i`$oOyrJ^)|81mNodU#98>72gPqZiLYr z@58yLf_INA4#0ou)GvR!(4(B2sKEA zELqtIzk3~0>%0`P0yWVtN=&5GP*0TN3I`t#!J{P8r*<=AnH?`G_^PJ=>eh!@TEw0) z>pg9@<`5vNg?s(^C1;(+=CCJVM#jzBRlLdTVXu#plxsCbFZGUt)0x6LukijAr+N>~ ztL)4L=w9B=jrF0SDb0Wa%v8tBxii%kR&iz22rgUyR6R*gYX2~j3=_U6F7VDC70K#2 z*Uv?-xVD~k&@fgGA~`yLrtf|!_2=u|*Xy149YaM@p_-8*HRvhgHZ;j!m0v^y z30<)SF!iJ99SWFYjf!a6s+tO{on4Cl7_o6VMvYl@J>QxKug`q_s?iD-9CoJY*nb}s zbcUJMpS`RtIh`Aw{|HiPWV#1#NHh-*sa5w1y!L=d4Te^JUyB-ep`wy9UC-Rx8}GNP zlKO|ovxxrWzS4-M8^oO5&f(UtLLGUqN2p*FD0*Rdr+9|=9q-V!7(FDwu!ZLR?a=lW zA1UM>C`^1x-F76E!P-_ap-RK;3z+|;8c%Pzg^Su`7N!)Fx-$L*v+e3j)asE#b=^oJ z-$S#nDk0af{JO_6+$patU+OI%i0w*sMFS$>_~>Y_@o6o(_Ls*xs72Lo@1B$*huLN+ zF;FkHvti~g&@{5Z@cYV3-ZMuBxYAvNovDDEq9z^q43kJ!R0q zT<%p4i+h6?i|}Yc?Pknl`8^{e+<(Wix7%s$acq3u1Jb6v9fUenJd|Yh`U>TO<9Y@9 z=D*)T2Ni6qFB@)Q1r`y1nihCj>Eb0wD*u{A=+R2>`Gr}-{auB`OJ(u_q_=W#0eE&O z{`aJO12l2}f^9;pih(+Ii>;&N2J%?U&K;>3tBvQt4ALXcGT3a<*30L!*9hFew*GF! zIzWdhFlc4QLS3P|7cbG?lddv7-H&LL-@Xq{k{W8sf`>6M@&k|}Rk1C#xIkWCmmrP& ziUh~K4kG4IIQXBSb}(bzxPi>zHb?#|MZL^=PUjL6VN~i{_#veY-fmk& zV(@8Ai2Y#TDOyx^L8&F@_*3xdgpwkwNd^O7bp;L7ze*b`RfHxx;rzn zv)8iM2GO0WYnhyU-XCp#NDL5`aH39Tu6RI#P&kjcGD5{c%$^>&mY2P3_(?%BbhOOy(|AhCVG^sIN1ONWvmvC(&hp2VA)Kt=(^P zl!F+@3FJ~+(K>9Mz#5B&RG#AT+b9{V325r}Xy1)n9~znaCHbi!e+8>=Q`wkuHb?#x zTj4BY5|L)xB%1j z1=dk|cz*qJ{d!tlB_>`e1a+`NTEN!h51>^v0I+;L{)_Aerk>wNvAnr^jMDQ@*Jrl= zwWOab3FvJ#HA-;Nc;@X=k>|+W*^?$TH9{I_79|EwW~y24>Qb|RbUB{EmJ720;sfA~ zf2)T8=H>fWTCa-MOO+i?YL8xuF~4ySqZ=f z;n92`IgRZDAPzCePR*C~x-cUoMSqrks|USo5D+D*JtStD&kc$LWH}(7CM43!Ul1N9@wW6Gc@?-HNLJ~xbWUR?C_&F$*t09=I|l__U#VP z4ozCq>Y(N~a4l$;jX3$}#pjnr6TmOUFl5|m657D}$G8?GY)@RCqy0Lay3jqqHcpkd zW6Vs)+i{7KdvPM2SBkMzV>jYS;?_=hdVA}=^`WrkL4;gNtQuVen?!<)05Ju{Og@pw zH98}{QYNJ&T3!h*Lc8t7+(oH;LhGXvw$#WgLsoVHDzTk`o@2~#pc)}yg1)@W&zB&D z=XXljA-!uI=c!hb*pZ=JmT~rx=jho?H^TvxKVulyJV^XS zVzUN;mftQC8(NII9$(3S2EI9c9Z)bnTDE>q&KNvEzEjvP-d)NeE1GFeG`+~^C(Hcj zp>s^!o0*y8`A1=Auy*!^N1`!%=^=qWtM%3y;c?RhpQ;LXTp`2DCVPfIs0dyy6K_X5 zH&@q2V%Il~xXiRI^@7MPd*O=QWO4%9vAN8I74plD8~|f%D+wTj&PcTk4xqB@UJ(l% zB;BLs0+_QVnk4$ud4d-@7Bh{@)$Vf7VJ60xJLi zzgA*|(^`JybFD-HK4n+xKS&(uzlo`Tk+^>gAAO1UW0QmP@G&)~W+)Hzr}1fJGKFHm zEVd;P=!q($h2=P%rKS{}Mr`MU3;L8jn0eEvGXz->=akHWqnY0FHC zs{VyPo#m+i7Cyja_iy1N{8lKreMM5SgX278O!MsoKyjaeSi+3rL_KMaRX)JK0D%RH z10+_)+HQV#aXg;^(!hX=h~DB0OK6jDmc&YIm{f658AmY6*Z|?wU2`*)u&dXekNL*M z9rx{-6*mF2+77L8F%Je%fdaGK{z2+0z#k`M`a{#T$YAa%PdvPsGl+2O+n^QiQD`G_ zP>rn!S^vqlGY{Ya#`a!u5ay8Y()qDcCe5xBOavg(UuYW7>jeC*tu%4O9Qq`=n z$NF>97Z%$38clN=jp5_B>SW6{dC;B}nf7f1RdI#{e}MdixGVNmjDVt+JCjgVzo&7{ zr0$Z-M39OwkljT-mZEXv+ZV&?dr?~HF={Od$BHe}<3B2JFuP&NnC)6*YSfZz_5>m|^B-Lg6=4(NNw`XuTn@G0Gr ztDUEE%yF zSMXapZE2qW2||(*S&UcR0s^rLX&w4wjYc2JxJ>B;%sc=hs=JSvBAo*(0M$)y?C~Qg zPbCC!pSbo1s3+n043Gv2t<>ZThiO&4`3G?td80~oI2J8_0@-SN9q*7Gi?`yD+GHUu zJX4~Zq`R2vg8rI`niclI$pGy0CdN>HZzrFK^U!d;E7}@8h?OE33w+rRB>1U(F|iUe zs0P5;zjyvXk2(IXU$PHV#u`qm08;0zD<6=Sa8VX;Bd`t1$hHn>H(aCRgE-}Vt;_!0 zl07mg*S5KUUz!z3_z|5@ht=d_bi0t=Hz)E@#X39Ut}f)Cc-Ttnfb$gZf3pNM8nCP- zs~Sdq>QG5~io^MkIzS$=wUlXK-CHWb^I>tn6<7ULz_G&BlbunZ6|i2rbRyRI?F_*@ zM2aqhhq`21U=*@^oc-Rj(w(hn6of61Z^SQ2$hY=@xNc_7%vR`}p>*=%5EU~bI8nDs zV3MZ&OMUmX<$qj`VL)zkKOR1AsR%;-!rhgr<@fosh#wEg z@ZRg8CRI(5>6PAbfICx|mkOXhk!XyPJSH_iDAr2$Z4Iu_O-`UNQDEGCuUJob!cL7> zA#l7z=_)byu8ZH~kYummA}U+#U=fQV>4I$%W(rC|MQIWEZcH84?-rrWbwxa6q1Fc> zkCIS>eyG^&n|UAf^Ii@nc3)_xJm#?dey2wP#$`Cza=7kBk$DiBE>DU*PH94vhsaa? z`PsyQu^Vsm(vhKo`3QpSr#qEN5JdEa<8~QLInN7%S1of$eTI!K)9FK$QPm{19JO-F zJD4{$^%_1XvYSwuh+hAS_ztw1IJ`eJca_`}R3ck#L{t+yR6z@DO`wgHJAD>0ry$c& z*Jd_)9m|Q#YUZ;{HiL9R!djkTr+le54EF zJTQ?BfYQlMJcYQzV29~w#>DMX)dUzYP~NQTp;BUef*4|otwlgk^W)N4W66aq zev_92lI!e%MN&MNTNPAC(dD#dnx%)rWac7-kl%r0E;t?>&LeW@d_eyiFlgEU28^}; zVZg{-_y06tEN_ASHDEkE_;Y+3Fq+Xn4H!%Z(Vqs4vcLF_&jWdH(#ado-aiJ6HMQFh21V`iR|(UEbW- zfW@!whyYdfDpk6h01|S7q@MYry#2v?EWDzi!=? z1gX6bA%pUZI)qs-2O04WE1d1Q?^nfkNZ^ zrZCcUKXKbB*~uZYVf50{x|5Uc6aSy`=Lr_yX#JXWMoAZH+4%*R5V~j~q2^^!kO2ma zPY@^rjEkQLV8GxGI)*lrQ2>@sb3+oq6B!8Gz{!=}(?>#DlJAeWsJBF9{D?ovP`;P)cZRb;gu~?2KR6Jx+U%Q&_(9B^$ zdKW(rE}lp^46U+V8F-kRnajA`Qrnmpc~Uxt)~l~_!k*ZeNq;e+MJh;9Npz%++fRjd z-!2LufX7N5Z$HZ2VcL&gEE0v;TRj7-a?2Jbw74v~!oNH$M@6z!DStGeFagjWSb>yb z^x@q;))P$HLoq*wFbj2lA(Bew2|p~icR7a$cnMB>fYoeNSU+aZ5Q9_a_k|KN-w0W>zo zb)1|&`YjHcjw#$z9SPa$Tl57`nLuI?U1r1pU^kK>??^}{B|{5yq{zw6^Ul*A=| zN!0tQ=&yxp&CHP7))kfc;Cii09Nnw6x?(V~w2P(hEJb=bVyF4n2s(a)zuD8Ky0zQ< z%t+-fH8f`3`}TuEW4@k%#Zar692LDwdZ_7j;3Q|E6S%$Noz;@0E~suq#YI;lq)M&& zlA^(y8yZ{EFoTq(&R=zY=*ucH)Fy^$=1^?`AqAFX(7~OshoM21Y$KC@JyC?E8^NI2 z(V{?lLLJTYCUR4h^(~gWrSXQjGsM^MO(q9kj*JCL)CvW1Gs|YH-(Q+4%~=`=35oaD zEoD0xD*aPxERuL?kZ0d|!#HhzV+aq%mis8o$i_@djRPhwnE$q+WS_V0$6qf?25-?Ii#1{rxUJs19r|kgkgw(za#EYC5zR zxQv&f{{3klDSO@CPXDgZe|*?J47_MmlCTKAw%Bl{ju*M9PDEDsfq z$bI96iDpA@y9rOLGkrn4HvGtno=7c=%-0=~)+xn5YZYJEy$icr(*>aE5yj~2S$=H^ zD5$M9rh9Hy8N?U_mV|j$%$V)i?p9D}{|uf_v)$WPq$eM=O6u&~F{jB+BIMjyT%lb2 zy2&6sBjt%eX5~4cdt3gq;y#PbwVqkEh9`_Pd~o#u!T_CYG?{sG!<9V7rgsh(Ymo8K{AWdUEgJg)ZL&(OYTo zkd#Bx3h{Uu?xp#En0pz#exy#;De z*Mt!EaZ*~cibE;3m^mE5B`O6Oy9SG=LPGf{MzqsrxZLMXr#Xa&S~gVPMnko^@5C;UHRsW??6Y~iuzKfetQ8IspWJ+j{BZcK zuY}5fG9;zH%MM9t451&mo_wF4a$LX75-M?v1eTW0Qhyd#qBDetkRl=*m5a7wFsc6g zXhJwPX4S+2J+x_+6k)PjIGO*}o6ud2;29=#gjup5O`^z!3v$6hhpa^l!?=@JV`Ih8EH z6Oq5(AYN6gj$qal(gsn+X&l^|?;942BhvUMvrSeudsA842_c4GdF$)dy)?aK+!L?M zC-N;`TWu<88$0q?&D!Y=VDIL>=~+|52G~VttG^ZuUEg5=9x1**UpaB_WA0hO7um9m z_BBw8xV$-3QqpBN3SGNd)z+8siPW(T<{`_`vLi`NLTJ;pi@#o5uyeL^w`1iwgPR5g zBu3grM?*3WrRjtB#fRygi=<`d64Qb(a&Yi*addynIB4&dd6i#LDh;;>8NbldY*Td6 z#ZWtqCs<>0RE4%ui-}LAZ;%7LV%)H;lB7_Y-oNWQ@%kd_Y|UqcUJLj!60a=zM8NxR z=uc4PVYgg3QHeu0>)D*7i>hSK8XT;sr8aO((_tO3H2hRu6S=p>G|o`lB#X)|EF*VK zDh^*fJjj}ucPK5q0R}OJatk`{Jv7%YN-MWc!VBGHZC4K3u3G3+y#|L7dk>1+-8R#z zP^y&3D@5IhV1laaQf^hcj#LGuQKsU~HsGBM<>)`rQ7N@% z79?#%boM77nMvKp`*UUm{y>RN=b=`-W(7l|&}oaN>mbl*RPkeEGu=|KcHRyl?VbRQ zg8l$0hu2k#2NOM(=&U!=`W#TAWMUKyeVGTLxN)8|WqHZKHVeo~2q}_4zv7!ndG%GSKW}lweS*F9(XlIF8<*Y*ZzG`|jiAdgbkN>dDoMeeZx^oL>J7l5NzB2GP^nx zt~1xXY=MnLQfY>q_*DURQ#335ce7+LfRgkh73>hBN_pnAcJR;#Shl-y!#y%rePfls z3y+y*ZCL5essl?Msn?-fFU{5%7c7@jRHa;%P}QvBfv)fF`dq27o6pV?sutEYHl=|J zo2kG#T5IMf*+`OnUGm$5j+&$+UA+1tpWVA6D?LQ1X}U`dowFuml|XSR40t(?F7-{S zp7>oBgtlT@W8@-4CWu}(<0%~za4y(^Ief)O-KhCiAm~}%g%Z9P^wLBB45oZO-UF!g zfPZgY%FkX|S{AS?k4MGsVtpcy>~d&ZFloOP%1^?a;G6$^`PV#S9`R%=7u62z%stUt zHrH2Ft?;66Ue|vYKC$6_JY6y{_9lvV?rVvyT1#im5fGTCKT8A@21`F7tLdgeww=O? z4cjKDbao(1`;qEh+K}x)iVY9r2nxHCKL*rP8Fi!S+qt2{tvsDaho~mXK`hL)MRT2f zTeAJJe1WqLDynaX;1c~qbngl04$-zRin;dDg85W8KqZ;WSVbQ#n+6v{T=?vY!pvd~ zCt)R2Nr`Ui6|c(fE3L8#EAY(#=I0co>3A?;E0VK%?|!foRHX1=4+m#(V{Tlp@tRj( z|7ZBj*9f-`NQ&A*Wd%!sy__%vux)GvTN0RWNiQY&BY}u#WIM}+>st@)q^K2?WBU|I zbcm5J^)JB;K5kWEx(%iL7M=y&6;M&WpT+FwphjRJ{$t`TMZ#3kyzBR}o@`yKIXDAS z<|L<)9%MK?3UHUrJy_UDvmG565S{bnd#OO-8_bY8(|136t)@=;)6M+1k zor8=cBdr-|1p+xu8f=Y&QejC;&ro6v3<)H_V&D&p63R2GW4YXuabVt?!ibadF$G-> z=`@{X7u&p)0@a!Z>d;)IHY_bT2-C#}_f^Y_4~3QTApNMfkGwq$7_rkHRZ5lbV08$i zsnDF}&GKR4Dgw~?Kyh62Ye_5VPyv(nBn<>;j9!e+%@i-nKbTi89rjczZOQsS)QlPy zceEodu<2HV2B+xXp)GrjzW(1$!G9K?Qhi>;|2xJk7Y~RrBO#TQ?&Tx=T}pOJU}bet z?Xt*liTGCVcPH7>(7gFhIKm#pLbjs`w<+!W42Co~RZ+pg|T(0W89{Ch$|- zpIRa^(=7FLaOWn*?T}7?%L--#!Sen212zT>z_T-QLyfJ4N?Vn++5Q@FzUNkO@0_c> zJOrEfWo1r&lUcRbI15lIaoxSu>TK__#d~_eIiAr8z?8gvQQeK`V-d?sL$ug78E$9; z8Z=jG^M;D2L@rP~i$k*5&))pGuj4Bo1T~zCmj-Wp;Nu7w=?jgQ1VeVa(|=%j6cQT2 z1RI>27t>Kb@m#L=(oy!){CAF-6d85D_z{C}1X&ViReg1m#dC+~?BF2FWtBj8O4RTN zID<7q^nRZ=f1*kCq;0fNT!dz-tRFj}G7VIwMbtpy(<3svUM5Ue&$ zGL%Hx3f_V#34KWBCX9kS7Ds=>s0+&{aWL*gbhwrpuvQCH+R>T8*v{-@g;DMQ6|ud1 z$12w~4ZagpG=S7UE!n3V{SCX>8E46bXZ1L-b=>JU5VSgUJXG@1qT29Dq45Ej9i@_# zrD3r`GEvlsp2kmTIRUw4QcJak0j!SJdUr7d*X~4gvl$fq4I8 zzWP2_4cHNX?R(}4@LnB0|EzJn5QJ6w^Ofd6E7ir?q6&&$CVt?r(Ru9C=q!7w-V&3c z1G323!L^M7Fgkm$LaKxQtY>hP6j$qA9#IsLlfCM-4Qh4j>)>;v9hgCz&S**67HKb4$zrnaun3O&gDf8sN4ye?dT*aXF7qM?jZr zoP0&A*89R5=fMj4X>?9|nOITc-ibu|Rk)-xKG%zo5}6r5QhiT+x5)9tlrJFEF!{Cq z&ILApa7P1fEI^z8K%#pSq3kAoHnBXFPr9t*;H^+6V<6>J)DEtPD~tpK_ux%!YiB2EZ1COJ+jHBB5Pt2U0b zr3Kej)B!V%u-uQ`vw}G*9?g6WqYC5wJ z^qgd{0hAPP9jHkeG^QdJ!Zv7aK4%ZSGn~Zrk9w(g`Pif!mP++DWS3{{bdYIp-Mphh zT0FV(nsa5oJuaT^h?&%c#T^S_#JJoFAqdp+ZI0H1MG zs7#5Aoy1x7$!8$d^fg;4=EB#Rt`nW^#h55F$%nW0UIEaIz1QoP-#OD?R7nX>gek3i zlGsZ?BvdPsK4Dci!t-8_v4PuPmys?!InCf95czyUpQ-x2+s3}`O_3sn>tZW@!; z=FF$ha6o1*bEl<};sOM^4?m*SKLCA)*b z_ztcc42)R~QuHm1p=@KwA*J_7H-xKvX8)Z|%^(!D70NP}be(kOkR6u7Scqho&PlY6 z<{f3fxAaxZ`)*o)w;cDE!gzLE$G*`b$@(tv&Z!3)8 zz$uXK`bd^k2|PY~IB|C3=$NL($Hg)T_;@nDOEt>N*eAs&*+GV}#?ye3r9wy&rb1I( zqr?|Yh!xZv5-V!gV{khpeT6Xk$&7JvVB%XPuR(_cU{+0GQK{A`T zGp5T>9iVC1HTZ1^^)I|{Jq_zD){J1bTQx@6>|qus*{>G!8}+48HRwfMOf*&plm4FR z%ttT$p4(j0+-N1GmWkZ7FG9qPAwo1*f#A~^rV)jrlI7%WntUlDGN5d)G@VG=J~Oh;=8RH|>}nphL8D!!N^}F#&grqi#^U(dr)3R8`tfHN zwoBcneCSIm>x&Y-l)o#6I$0B@3b#((gO6K>q_Ll~^j}?q)eZBj$;D(3&L2)ujTMR0>fSL0mMWjQv2KJ+$+O(XV)N zY~uCmVN5bCHr1^gXNw-v~w_Du?1k9TnBCPy`(inzSTr##!~`3Ty0dwhZ+b zo4>P@ME6xw_te^0Mddm?J~vbgB(I!xSl#BC(Leq*RivJ$i_iAS7fhhQmOf{Y^Z==& zD8>SA=KOzalJ0L!{3Y6<}NzC zy|t5q(8;;apNOIdN%kMo#U_vy(D3QEFT;Qb&2bNk3sVi%tNkD-ta_V(_0OR<2257m z5do8xo^`$@yi@H*Prn}sk6bx|&z|^zU&dY;XtDf;V`AQr@0d18-SFl6X66Etq#7~$ zX;+vnTH?sF!4ejSrk1z@eT@@WpQI-}P>0;J3)FKo+2=)_Q$=p-Pxe`u*ch`as(X5v z8$=c9V5>Wl>7eF2E7W!;CC(ZBvw&D|c4OO48r!zj z*tTt>QNu=!ZQE?pyVIxVz4)B_Kde2!*)wati$jzUZNPy#m&=YIH51-5_)ugkqvG-K za{KZ)47=i~vN1M3ak9-gVAIRIm=iCxUa zQY?&h?UXerPYTvmn>Y--)bF-ns+HstnT*~QfMGQHYqJQQ^2Atw_n~$G8iiee$nLd* zKNU&)qT9fWuJ6JPJeEbUOJ}*kmr+(l~@g%)$U7Q5S7)oOvcw*-rd z1TzG_Ofgzr{Cl)z%4KqiP~^~H5pKMWY)qjg`dXQDN@W0)s;Xmy z&5q#H>pumOb``WF; zPnz~`*C<29Q-4SA4~x#-s*Hw-AlNYvsYhE*ZDP%lc3G4qabq6YCq*1hg0uG&I7t z8O13KL_>RPtO(eE-eBXfy4$@TL?;LNPq=^k~4$`84 zA@vYIOH2IGKcB!|OD7la;-*KNOKIY_xuQ zSP-`SgyDuu>XE$pElHb{rfLh-gg&5K@uT)GOwB+`&7ZBW9de-vudu32Mgkk3?dTATUA>}BWp;SBvZCDbNBOwr#BlcN4U_{IyHqh5qO2JZ}bq-_`Wgn zg5A6PK1W!D$prn`>Wh-;2iyt}J#l@~ttYLhqrK|7LjDXlgNoG0;~tEUhNSSJ7=Ve% zVPMc00M)mHEaqz>Q}kG4ax_B{L9-6WQ0gp|{Vyg8zerwoE2tKtHkeTTlJNZC4pb+y zOCz4F`9slGCisn0y2VQk29D3yE#DQQrd2|h(qwTm3bYp6q3r3j@@qQ^E1B6-8Sj(; zl1=Fr5YTA`OZu>SFy&hX6+*^j%Y%}k^lHT8O5KHNd0TUxg2DO--W47wTUGSs+{xU- zNY(?oGu&}yVw50Uy3q;y;nh=O%Hf9)Zrf+(u^2kv0L}R+%|Gh%;0FW@1{{`Y1F^nZMNjQ@SED;x+uviC^Ze5W&_jkqN}(BTo4GXrQc zS_N#iA`$tyWp%7jqyWBsg%h~7sS`U!<%*|KfgwDg5ER^yv%xx zt!#AFo&IV)D6?kagdcl2C<$sk;B%wS)dPCX6T{)n`r{*oB>xY+qa%tw45k>V!Xh*b zDs8YqV64Xn2|EWti|Ufm;&3PE0uCko0{G3LF^T;g2y7KjeU8 zn=wBeInxU={2sGV-4$zfny=P8i_)d@&5&{JVG9JR&t6PFNip=jC8yyJ(lxzdugMXb zgo7`7YMQ#z{JTV;ARX~5$9F#mE4s}ty&1sWYm#wy{y_D}tG3e*7O`_&_7=yzQXMbe z1XoF;kW34|x1{M;SCv6fJ$d!WtKaLwZ3Jg-Qz@eCf&gguj|3Pj&Dfz2UX^lEH~QUt zoA?{L<3=E59KDfPgbDl44&r zBuBebS=C3dxFj&ZvGM% z_*mH~1AYW-(fYjn;d{1peZa~0?V%E&LGc{JBei?V=b+?*ntIkQg;`8eH`Cf=kXu>} zA6r|jid#^hIZwUMe?6HTan$=<7`=dJ6FWjnY7IN7@6PvhiL;242})h)i-HrFfF`5i zr2__4+dV*KE8N21nIKKeQLYh|c6rMBMLnzzY^>SbUR=SYG@%Je)jGq#M`ekFZ9$Wh ztUk5&?3`0rv0}20l)^8wo5cY9Dl*Yi!h?Qyq_D&X0duxYYAYZ_n zxxDsclx6)oF0l1h=a3gNbFbjA!@ndxvZJ15xE5j z&19H(726-Soy6zVhNMys?}U}Yn0;lx1#VE(vRuyUE*r|Fl!)yYmGrol*_^CGZU6WI zKCU|V7Gvuxt|d%cQho?cXjFlvnW4%LuyL#&h8iIP>{zULD?hFmJW0&`_QO$wS|j(I zwKtZaSeFLfG1P3@4>AZS4cWVL6%${f13J6YfA1yJ-w#j@_}lOf`NKeY(w{xgLCH`^ zaTYX+7weaYYW^1-(rkYMY`o%&`3$<ci|IO=$PE8<4y3S^( z9Rpd$$Rr5d?tFc}-AUy2y9aJ}YE041b|K9eIC;X9dY~v?E&vyfoe=sg9(XZgf#X*T zS+9}&f`l0i=XHEMw|jK*ML#xug5YA&QX+>TdM(g!!+CeB52jqAu*1~e-&6N+0Ml$k-XFE|R@`=hK|AW?zu7f;`?`^eJW+M9~mJ;IO zxEJ`3Tg0)lxdMjTVmwMT$=&y(&@OennZC&e4_F;CBsFF&$FU>01cQNIOX4a}TJdMB z^CrDovWnNI(daqK+x6aYCpOF2D*IsPgeLfXlRA8ymo0Npm-(h#=%t7RXqRRg28}IM zdcc4~7c^>Td#*le@<@IUMC>dM25F7sItks;E_HygX`tIid)~Ufp z5Gp-m{TCp@`|Z$emTVp*XX^H;Mu{Z;S=fk9D4kOTW%vlnzI{LJ8m#%dv~kSmcIl{f zvGJrm-qNGl40Ivc5G+qIpG)+Q77IhWQ`uY*167&F1!mWfJnp!}faX{JX`cCg^))UF ztiE_vFHu%-JV0z=zzBa9=I4c~@+$<+LI;&cxr&D@uFsvP|L_4aP*uw4XVnflngjEi zz(}tNcd{-rmY$w|zs}W6@fSuK{Unh+)3@EvgyZ;1gO)uV3ynN}30GM%mgvj%+~eBG zdjZSN-iP@KD&);Nh=S1BEzpR{Umy}$ZiVIA3Fq{M&=%L$hT%C_k)u|Y#G`fQ;q~mT z_kLot40$fJhLD=MtdBN<%5xX%yL^#eYkqnA+C?CxvxSwKNs}dvI1z6CJzA2J-k5I%a zuwn`r^I7v#eO<4}qfMQV?sO zK{Vmt;Dp)KRR-K*t;QEvl12r;2TQ?Mb-%)!XB-DrtP+C;K;%)sFlQ3aIYxM+P;yHa zQJ!m9945HQio{{2+SVbW^zHr;Vk~CTbN<@CiN=kJ=t80dEs72sHPLhO0$c4=)scE4 zi$LqmJ6XPYLKN>C+^Il@SdCoa+>oOEGXp}`pDG}kq*{NEL=$Tu*~>v91W~)ZRAaow zr5KGE3z3M!i7b$v0{&s4#nRGREw`KXne)&?e_3Px=+ZkUhEPD0Xe|EMQ>exRmf_G5mI@A?3d58Rpj*3a_fZewljBXKgCiyFI+Ok6D)Ovj2Op1v39n z_AKwR|8H@SIjZKyb81WqC(@{cl1fm;#wA%2@IzEZ#>Rm%dy^| zS!Y{fyhW-NR-zoTx)G|`{!`>k4(H^ zl0*#5(KdcSu7hFE+s71KWIXn^oqPiFVnmtv4W z$RUhQCW7*XT!8D|*v!zc*cktymBp!IKr%5PD5%LE z$P3Qx{M`Um%id;!D3{fEy0>-%c2kz&$jZk!o=vDa=rgx;sVnv8A_uQ+D`=$DI8c>I z4}6aT*>3hDH*{Q%R20Hx&$gy_mOs|x(-S~664sqIV~y`C?A(^lWwWfeGFFh8cy@F=`alXaJfYOd8zPZIf`qw{SN3 z7Yw3BjED?b$50)Md#T9VJ&3=NVEub`(gz3$#DmtJcF@U1X%*}#LUEo~a+LY6brXt^ z%{N~x!}ZAmX!O~Me&;ErD0H2C>AsuvNLW8>QZE74ymcGk?fMA8g5lz!FleQdRs+Bu znb4P2iKxjHtjbXI5qca6Z#uY^b)V=!DdbiXI}8~GW~qzF)m4WZXXVkIO8H}*bCsmY z4?&YNPGR(*0zCLxEVSU!RqNx8#9UFy+S0f$Xn0L0g2KO*?MiA$qbK)P{taYrTy$xy%xSbPoksd0 zkcbKo8Gr_QuQ@4l3_Pg(L#1H4wYiSdbODL%CMvmn?AEAS5yHPDW7eT~LBTWWK9%Rd zuEUUZZRsuF(0iCI<7?|2oUi4+=C!O_oW|_0^UO91Yui<_z__?kSCigIR z5cc~!a6zPHrvyvm!}kSS$M4;dbKdvctlT7UmG0T8GIe{DpiZUI9HSMzf?WqL|o-SO?;3`+YXE0eT6q{_QWBn^d;lM@U`J?sLQAsG2e9pJ}hEq8T1k39euq6`c`GLx(a zZ1VYf4Gpbd3fTs@J3R3T;`e@fmh<;&&RA66Bkq2w-`<<5+H!4=D}=SGjiPvwbX_Ox zB*`D5be2&rcV-n3Ccg>SjhFOazFz)yon5!c(j+1lpZ|kIp%N>f-K9*Gm=qTpCUazy zQw~Bv9=yzUpqSs+b|$p8rAlYgAp;Hy1SCy?k|cA#*mX+yWPzr&2Iw!;4WZU4IUzxj zq&9JVTRC!YV&p=SlqG159c)C=qXY@iQL6%yg+cQvlp5HZtT7;7C5?wHMT}a0ZKGvd zbkJ=C?5Q!lftN6t3;yrD1gOD3y|TamHm=$HA9|OKYD6KiFGGC zVUb2M__7^1+j^MXD)@5sQN{>3E;#5iHa?y(He96EZ>}(~ozVy_e<@g^L<3ZelI@=1 z%iBE(fG-pZ36>M)e=s3rFj&xgY3yq^i;qVyCbo4*!o&MSod0z)4u22#;dB4a)tmuu z*S;wCR?~RDfxFK^n+k!}O1988(XyX-+08E+W}BX8FquEq+FO|~(ko{HD{@5pllwk6 zZ-Oe1rk+OANhpkNd2=`YN`-m>zw|S|jAz-oG<-7cmK~T|>4M6xK3aN9hQgZoq3ITz zVqn}0eV*LuZ}67h>x}mi55yz4b5IT|1xJwD&@%vjC0D8iYV|LM}7ts z_3O1JbUAK+<*F}G^kTt)Il8E!N52XopF)R@P#eHekPWI{?VMF<<{1^P@3yi#z+fu| z$V9;@oeUg_x$~yL^mXSj)OBXbh||4AdQvGuJJ(O|h7)#}6~p?=(s6gU*WlO6hD?lc z?es&MS9RO{pd}|A+E1qL+V}{DcVge~%rDv zKD^V3BDN(rs7q<=wcG@B1&DSNQr8`*^BPIejs-vlb*zFZ>FN@|Dmy~0O!+@6+9z7~ zTf3EDc0;9^O}6mw>#@#Y#`3oHc#V1=kB}G(nr`!gmtBjFL^s?kQQHnsz2bi*Z;nnI z{T#CZL$W_Vaa04=w8IhAUK;*{3M0xLs?aFgx9SvNm1f@&BrpQ)SYZwF{WJI=3vCLP z%SsQmyvS5~Be%MOAe?0>Ycgem$V{H}kc`~ZfVC5w$Yk8+*h=CbibYJ>3sEW05La^# zy+=L>-35=8iDh6i>g4X|Ysb{Ns$>@e<8z#Xr0Mp9gbtr_}$Sen%^KMNd|YOH(#gYW;c9p?rv8($12FR!mc zcl`MB-SIZ-Z*WiMBdwlVThH8g-KiCrs6M?e zZ^@E{slb1Q1%@&hAHA_;pqBgx$nSw)NtL4k zJ|MFo`c93lcOqsAWhl#!XG>n5^Q5K8Z0Ofa#+I4gmsU*s6PO$mI3)Tl=JV1^I}#XH z%bHl%9&MAJ4yC~ilVqvd`36ZN5mH-QLmWSuNG2R+{yG<#k3+W~_t0L}5%3tqNo(~1 zXayG7pSLq=AnxO#i*bKs@6V?$P~yX{;MYqw28KzVK9`-q%Mi6?xhCHCq)lJX^it&> zMxSQpAgM#Fc484EZeib^AepzDgw+JzY`pJnSZ8nXGnzysO(I{NRlMd@n>JLE3+0eI zp_e)PHYU1);BI^oIe$*M%9kpO1@Pq<;`flgpEmo2F7BM#{1-P3CDkF4PrKd6 z2c*~abLPNrZ2bO&ijY^@B56<^dKUan`T1F29oth!^!ffnJM}D%PPD8-_lnS2X%jl# z2PJuzCUhp~)mqF3Eq@J!26HtF*>)y42%j*Eg{m);K7!%zfl8Qnp5Hf)V7Xoku>Vk8 z!Uq-v)(}2>{)hPyC-^nDYv2W|Xl4?m3HfwyWXx}mvk)C4U|@`dtsGkVuE7N}aCAMU zykul~_{g5WIR(-sjKqt7!*(edgquXk^lGcVOuSffKSjm+Ass~aoVhZG_2;_))D)xg zZ4iqZ!jSyh;frUjQ^^J~#`-`I;4Z`KcJH^9AX@Au+|E2+rXO(pces5i-Kxn$En@3x z|+*Tkot{Kjw6gQ2Y!XgMr>ep zfz1?RNH=o_1=QOWU1Kt(o(=>!uJ1@U$lQ@B*K50o(hr<#N21W+mXNLo+VW+3m5e5> zBhr-z(N(;jS1A;}a%J0L;wI`$|CR3)12SCTO&2nH;Z6Ff-NL(y4Vr?;me|E>A1B@_ zQs8sQ7%hb?!}`NhgrvkOjfqF#V(*x|wL@?&Qs)t&*rK#4E7+q+vQ(VJ96*|`+Kcw8 z=jZ2}6e7*G*oDE%vY#Nk5Hyaa^AspM4V^eH0d=;mp&V|w|G{y2dOX%h1}I`N`$aje zXWEd8q)53iD-g5LS1jRZWkgX~k0kFfVTYKmgvo`J1M;Dg0?AQ@Ea?fEt4@VakDd7{ zK&UhV`nW~4ytM`7MT_k6wI<=R?0TuOYg1aLoaKWp?(4@0(}tt}y>|gg`6p+tcRBxh zH`sV%vwY3l)3=&Zx39161q?RkdH=^?)AFs921>#v-si`6#~j9`dghu8!XlsgO3#Tl zu7~62gN(0r5LC|LY?$aKt2!DLM3D<8&IG8R@{hv51V9z)ZUdopZV0#b>_<31s0k>| z9$XL_P=bsf#%M|P5ltfij~?hi!7qlvsI!TW4sw}DK(2u_X`m0r{d2+k<{=kPc;5~M zj!(>0j>NV-?nSJ~>n?#Y!>^=zAW_vFqE3d%1w$9%FuR+OaCnyqlU-s&Qs z)IcAjzxel6ZLG@{lP{3BD4A56M63#i@5>|PV@{zWju}BP`cHpuo&u9u zir45J-)NKFS zc)J^HpvX$7;~EECzk3ihFG)#j4w~q*DT;e-$4$F>OnV9!aWiU(^4f!X(F5yhAdi6I z?x~(gQ53baAjcbo(7&Mv}H%VQsXYl{4IhIu{u3-Rr7MPHFUqCPFP(vgA4f&1c$}=B*W317o;8XpU`BC zlK1lKvAgJR`DW8~`%h4bI5nE70@UYgxn@CGFXKDbx(72OqWR`%xV6a#J2aAB;r{}H&BOlle-%0G`0RImqtK~R^=HdX-AikQe77#(%gG)A)&-yGl?Y}% zN_DT52odP_o?2*Jm}G#6LH4R=_|OeyX5Ps*W)rz&EbxYgeZ&qS1Tt_ZaO;o>M`TL@ z1<&W(QH?wB*n{%1_aw;O+>C%@oCPhoASd-dSi-9^E`kHJsC~%}c8@)0%OV&y*aVn!nz_1{+bsad`S2jf_{o}OyBmS zAtzGv=9zJ({dWlGLb;xg5%EqRq~nN}rXBJoegO{QJea<;@w8<_A5^KM;3+bND#nia znMy;Ya1)_OO#bL&VnUVLN+X#Ywi$-}=|BPM;+U}Myxcw;0-$hbPUpG-YDgDFX`CuCMiBazQZM`n;5f5h?j70I@XBl_g5`4m^TKH zIJH1tP4t!+EWO69@2YSMcnm^KSC8#kl8oof%iR;6T1q9q0aw1i~g&W?iNv`v_f zs4x38UGq%vURH2gXEz7AS}K@!JfUWsQqirq_phHHwn(SqHQES~vkron**;!lfxg_! z>}Yt-`V>j{I|BxrrV10#s&8mUMQdFns_5a-(OAH60)0k@T*8CH;x4uRW!ATTmz?oJ z@%>WB;-4US{$Cv^kX}oT#%_$P^nO)OM`mr9|Bg1f&Ybz%f$A)O0kaJP7=Qi=t&|@T z%SsjK3|?L)IC=TLe(!1(2Q=wePgm$r@Yd&T2`U z&CLTnhlq#s8(+!o1LyL)CRuwLR*uGZLH}wJ$gYiO`s=010Sq+ghqFbEdGX6Pgw65T zmqpD#DHq9mxAayZhfGvs6z%qdRsSBv6;(;1ZDQ_ctAGzG`i!{8-%Mdir~^lFAOF*$ zs_MKv4+NC6fjQ0qFq-(r=apz{@o2{oc2sgwLHU)xH~O7!g7Z!{IUc9Rb-4OU82?)L zYk6<+nUd_CZUW5_Cp|6(Xb~V``Z(}@$KH@*pAZtbroJ-MaH`H)kWG<>A`k>#i_5vH z;p2mti$|N^iYbybn23c=WL6;y>Vuf>WYx_>&;r?c%3EVArcIQ>x0kV7jaH!0b@rcO zKChv!iPgq;kuA6>>|H>0Z%oYuZE;aP7BrX$CS zDEmFUg4&7n^r@)&Y8tJRPVSHVD4mCM7ixy<{)pL4@)%79Tjun<`$gC${dgMWfrftlFNSEy{+(VkFWeY~?9Qm}!oQO1dM|?w6;A`V z#Q!5X_`hXvIotoAcsAdGA+o1HmWy`m(%mp|}K@#1sx|0HL^#ofl+mS&q7 z8nA1+JyCB~{C`Uh{c=;)=bWc=x3~xR67vHWw&^6MgUI?Z!j?d+iePwa;3-F&;UN%%=<~R+R|5VUc zc`WWYp0RSZPV3A$r!KWezFxhVG)&HMghhYmzOZ+MA6WmnM%Q-Wykn-&IFN1n!#~ZD zi8EnEV=;TyrcA$L412U%z=l~k1=#|k6M&_o{@+_gP_TbO?qZCgg zm|3nS%1lwvV$l*5T1z`2{hi8McNI4qcSx=LHM! zmwO9>Xk|i7lQT45X{vMN%4Ly2k~5DTyR2*dctXjXpA*eDUA&OnG;uD=0fFY<>&u-N z#?Qrw(yA;CR)^uXEO~y!ajE4aF7?vE4d$!=1Au53x6fET`gC2juk*BD#smy1*d6+Q zK04z)CSDIXvE0nMdDD1cHZu-ZGC?f->FNk6THkux4w;V0E>gf&2%c0)!zW730h&U= zd=%J*h=W)4YPNA}h46VIQ{KVR^c+M;b95I4r+=}5r8jl-0ui1lJ4rOQioGHhM#_@2 zNqB}CxhA(2ma!59fQo!IJW%~@NbKGgz>W=@$z1LGPW{1-lZI~0D!ZJfO5*awLl*!!YLru;j zi4g&V=V))x<-k;_QSXqN8gAvQHgCfZSO#gWB3cGUZi14-=2-AA2Ua9g86sXIWRSZY z1~Z=Er&e~bt16#28A*<)K*7J5S_!LZs^>$1kX3&g^W9n$y?9+Y7czsfO;%@$*ZJ_o zzq3q(o^Xmm)2>DU0HWs~975vGh}s($$x-RtqM{RbfM4iWqzp;I zC{+u>#L%lX=b^x%c}?r+GWYcqgDT>nbL9g4l$C>z!sU`m(_&64N6m63Y{GV4Q0@BG z+Gw5~d<)aQR2nO}{I`Wpqhxu3zQCk-8o#+@mAScN?INxuag=B)L!@Xp`r36AZbe~5 zp^$$wGqW36KAY_KK1ugC6^(XNI~$?N4)vwURl;ydr?vQd&$VYCgEid;yPCP_7nvHA zsE&|nj)}!mE_^DwQg9YxV9%Ua&8utf1TNF9FkJa9EfOOtYpw8+iKPC}!@}9cUM1c1 zsPHGRGnem2J4T#?46&y3&))%W?ThVm3ZL;%}!f7QO0NrMe|^Htd=yQ4<59_rWV32iafW=AGZX z0e`J+4;@PF`l|s~YT$d0CNX;(?ia18fajv_(A?7Ik5sHWC4TYBH-y^fD5t8&;~=-_ z6PtSHf!0b}(+BS{vu}f5)QirNaWFlKw_jvHBJ;d1Q%~w>SXWPdx?x6)vJ!AK1IpxI z$|!gt0G}<_nTaR)i`AnVSF>rETgjXScenI8niVW?%I|+fh(R!Fw@~Q2J~D<*&lGn` zzv9J)SL}nHWzcp<$hde{rmmAi@uzU%^=(FP@z2yBC%$Z%p#sjdDb%R84-TU1jT0O3 zySUW=oL}GL00uJ+Dn+ixiZttXJS3q`d>cM_EHO%*hA94o62mdQi^G=Du+T(K-|o}T zc*8e|&t|w0culK@4hxlq`5-U|7u0&${hOD#V}B?ZiAPRSb&B)qaKK$c*u=2I>V(<(nIC`CawQliQ^d?ru={6vH@b^A|h_X9`}2OLj$4 zRPhm+xbsAqX{VaKx8Qi+KX>H1ej?H%UB%Kij0wZBHLXRwF_fO$&Z*3rd@mkf0yjXj z;mnl2(U(Llw!`JQ4yCjwQ?8S+_~Cw6g}Wb$(&$3jl{gs)5TX~BBZB|x5~l+{rllv7 zLnByd$|Mh*$cT)R4}5-kCN@KmXnfS@vN;XBG*e^{{^>@Y)7Q1Zm?bubb^9|W3)e}m z*z?mGugw`2&*rgAJh zRwo%^;=p(dxcN6@+huGlF(0RJr2x^P;!4P?wQa=o60(Q_8AGIc47QTpkYm!hTU3_S zLkxrOmh>e?i7XXr1iF_~FX4}9V)D3R+rA?SU-H6P%koUHhgx(NHX@e~vjjCB5`grdvtF-X%yVxa?ARBOE?h54{Oxfgn{= zwk*3oa;r;?zu%iv*|}aD5J8m#fjKH?Ds~G%pAuvNwYZ#dQ+X+EG-}JbHx_6BHDU2; zYTnVZCGjc7;;lv?vID1)%xZ6=L27tRTR*>{m*oT|NU>_?47~&`mMR-(`#=H ztIcS+%sUetIo%90KPeobxh%U@sSL3-H#B;FTK{w<-U%|HHlinh8*xnxuv{MYvrjT? z>8%R=#bvr=`hqcCROibP@@Rf~mGSGzsGh<_(%&I<@pTA$cZYIaZ3#4jNDFk71coUf zB(E{GB&G&u(s~(#T>+b3azqO&z*Bf0PSLDKBdff60WQ$ux_kz2(o1lf6*BZXiTK=g zv5N~}=cov-LXYMSi7CuelRU8Ze?~-s(=yn0@iUlt>dFah zFoI0Z`?%Op>DY?YlNY~;rwAU1N~XB~~@RUt~(Yu($R zJP;d69d4W~lx;U$rw+#*?z(-rcX#;P%EiTdB6-Z8MWnzL#bIIVh{BH3E&JxUdPgu+8>8G4B# zenU1O!Bh%k0Xd2@gko|{SULVVvXhZIEvm3qR?o~Cx&{LmPWG5!JZ}N*@IwCy#bvH- zoTkEQY7W>|4XAt#Yo;+XfoKO0k|%xkQD(W94|)fXfN0`5X~x8OpE* zrEasNpZa_wmL?MGW#86)J=?w!$70iZ^aFnpt(S_`u)4u(jl`(;N3<{b!7h%>yiOH^ z!sDjaPc>1#ABmB}9sl<(59Ivc%O>zH*}p-&UsOlbtT=B`Ix)f%)kDJL@f1nTm5G`p zQSyvk{h*p%n>ye&V-E3EUNiAtKI<4tXEi2^2XfS|yW57a+YWB%zQVBDm4RyvC>VG| z0a>THPHoyAiUNHsIXj>ja>tmjEF1;aH6qxr{egCH(ta+$^(jiMdS*ViWQafzvRqn_ zClz!%IboGHJ|4mY6nKLkO@vDK+V!i+N$b+N?#o&a9~@U!M(kUg>n+{7A1>wQYsIg< zOJ)l5DakIl>z&Xf@T}U>Yv3Y%hRsDnMqG+w$>`k=2<1X~X8`v6k@Gz%rK!vbiO>|y zxc=Q4C8TL?1e45prm_BYVWe);e>0dFxIN7g2v=WT6r^TIIyOT-t|knKs-1oKt#dlM zS%C9%f~<4R;_3Ee;3;g9&_qT$^9vFjoG}AKM(9EP6kv>se>Ln%LY#QYLmtSFBB198 zQ)fD1Rh~Tzt+5R)2|VdNgPmEb!QWU@Gole7NbAAWMAIirp`olE2GhAI3?8_T{81Sd z>?VW8RHrE_9>mhzszND&p}?hO+n+X!bA=vcMwk>i2C|jiNlzIE1H_}I*!j9HxXv`v zww7jD>P>udp8L8Dl9esCODr`^fXMaI|JN(kTd&-|9R@>a=<&*jLQD>d_GjU>m)N0~ zV;&`eK9UQ|G^={yT1p$Q{cKxvE+8>v%st`Cvw3DJ7`>-#ma)NEzF3(vS^1m!f6?f~ z=X!03QH}=2o{C#==?aAIoM{(h+H|24gweUC&d|2~D>afQYF0LyzEQcmb&e=P0k)>J;kH9E4tR0T zL2lysF*j`il4=MVKkTpKZ+D|AXm(R)i8S1+#P11_ul=YV+JkL{7zf9dj$by~TegA> zi1hZ#+MdPG+O_6hgdUmQ{^)?{2o^y24*y;3@uIE3 z^$FrsPyoqSBU7$BAU;H+tVJL3zd`uP70mk}Ttp@an}UkEJA|4CvLpG~=i{?K>7~WF z+C%X*sVP*QbogtAdp5?_JOOj<4|BQjr*?_;X_7*Ty~x_|2WIi42v1-Ouhh10T+R&v zS5(V!8K`qUSWC`2-_PVdbpiRfvg8$47>gJKWeOkXCxnTXe!Zows@Yj8(&$^=%{a@+ z=aBzC2!{>L44Z2$aifo3*OfC(_&IVHiCMA4)WWZ@gUUkbMoYrB;FrqpG?AR1i>bvZ z#X&{fC6dE3lFP6(E^}Hfpi@SbZbo1epkq8aT(I+J2VTq*x}f>o*%wY z;So#0F2q<*ulj-1ik3~y<;c5q3-`Q#UQbG1&exar082|5vjKOVBLi@1^lEEd%REYPC&q_jCiChZv4qh(W$2SngQ^FY(Z8;)&d-*)t170tJ1$8Nms8K^ zPFyap5ONl8lR;AUo9!Y_R8X%y!zreGq#_T1@6dVLdTr)u07Pdl5>_m@>=K-Uyh0WN zp#oyQP?aCHGgG;Rr^*>EnwJI8D{^x6b(2}-llx$$9L>C5ZtHBT>gQw|ctKS6#LCEV zrSl?T`82H1 z7m>du2vVZ}Xs~371ZZh$q!|RkYHxF%%*A?K%}2zS@rIjU;LYM9yOiwT4>DIAYixae z)k&bkt0+N!VoT&ED3#@on1O#j&XhPAp zO8FNExTHD?=zN`X)dqB|j2$X(j1a_U`|s-UFb<~;ebHh9J^3sV&Uf~N7Jt!2zBaXU zv$8%0GK^z(cHl!e=qDrm*8=HYooVAGYBL#}Gop1W=6)V$m2OHymVT*<{zK3)bc>Su zwiEMXdgm%}9@pO~G6S*eS5{_q14YZwNTF-b^m0x)eVSL{#nxf6X;pwQp8Nb(h=Hp( zR~s3<0bi)qfm5Lsw5Wi~)`7O!!7B140N;{*2BUZV`NF4OrpEt4aBuNepf2(3kyzqF z%MG?1iYvnu(h+u{c_)iDn8Njqm`dCel=np1$|Rp5SXGhu^_PLiUoso>Puff zIqxcU&KHlh=4=IoPd}#KRD0suH?D*p@*Pku~nF;^F5)^B>(X5NRQm-?8jp%ig$#Dcitca+B zzrCC3Xgl$x%lp|b(TqTGMlZo$Uo@+ziAt1W@PE7r$D`h63Mdo~(yk zEX_xysq~J|Ahoaivi7)>g*xEbme9B48FQR z0OxM)!8PFRi|aKUa8QjJdpmUN@EbXiWlXXwDq`npFaRx;-6qGsZ`q`}R67UZwvf&>vgv!D{L5~vPCb$<+;w_)=j`$H_WAZck~VdKr#1ccqd_242Ivw} zs3}n{GTRJCDdC=kZJ;fdsl3t2Ii2%S4fLa?FsMtQWrd4B{sNZjP%j2vmNKn;-FzMZ#wL?WQp{+(dTf|0PFTCNM*?asCo# zl3v`h(@f<iq^I&)IgM!3c(+>weF6LBYa z>|0P}`Wm6f0w=KpRN2rRhGMwNf^X3fT|V6_FVna%HP8LId_5NzvA;c|uftk4_p#J( z4Sg72p)@tl6PF;KuB>)?#Hp}a7~t?>q7BN z@LxdO8vuw4DP$Z5dk4gMedieINCju4PC!gNcm+{3ka!m-7i+%XwjqK%OY-gO8(c@- z?=&$N?HU&Vn4B_wV{L z{xfypbOdHDM=M*4<8vaG8^?9=WsqVM|G=`;HdbV=*gEHT?vxtn-R>baHd&LyhY(i! zP*s`bW7T-J%b#;GHF2_>?8}FlI%k_jTtFL$|-dYZcSd(h(H?}a-@jP zKP0M^aBpfIs!Pfep&-bnijO~bM$EJ`SlBf(e*M9|1mqb#PF3MBY$iQu!@Dplg@&mCPiH(am+0JfsTEBOYA^p2)+Vnq=;p|_PUNVG<@3{F+_J94$F;pIU z)kSdfyy$rC6=C=k$g8_Se0_nl2T%-U0E*$yU8`Hv#`;yt2B@1$qqBFB#>4<3Rg|_M zR#fUDx^ZmmW45C_H~{Bhu~NWpw+2dZhiQw;Bbh-uF*KQC@vxkV5~K&1a;jDsHn zM4BLZ>oW9gnUQ-#{7!+s}Y&c_voAC5<) zhPR(d8uN3W2~(*$9gVcEp*|k(i?wOk^C-Z` Date: Tue, 2 Feb 2021 10:39:47 +0100 Subject: [PATCH 08/17] added method to list the known vocabulary names --- .../eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index fac55189b7..f81181e535 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -67,6 +67,10 @@ public class VocabularyGroup implements Serializable { private final Map vocs = new HashMap<>(); + public Set vocabularyNames() { + return vocs.keySet(); + } + public void addVocabulary(final String id, final String name) { vocs.put(id.toLowerCase(), new Vocabulary(id, name)); } From d62ea1490d494393a730cf10ec61d592ca21e4a5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 2 Feb 2021 10:53:19 +0100 Subject: [PATCH 09/17] cleaned up RabbitMQ stuff --- dhp-common/pom.xml | 5 - .../main/java/eu/dnetlib/message/Message.java | 76 ---------- .../eu/dnetlib/message/MessageConsumer.java | 47 ------ .../eu/dnetlib/message/MessageManager.java | 136 ------------------ .../java/eu/dnetlib/message/MessageType.java | 6 - .../java/eu/dnetlib/message/MessageTest.java | 51 ------- pom.xml | 5 - 7 files changed, 326 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/message/Message.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/message/MessageType.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 6eb2e0358d..a8607a9b3a 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -53,11 +53,6 @@ com.fasterxml.jackson.core jackson-databind - - - com.rabbitmq - amqp-client - net.sf.saxon Saxon-HE diff --git a/dhp-common/src/main/java/eu/dnetlib/message/Message.java b/dhp-common/src/main/java/eu/dnetlib/message/Message.java deleted file mode 100644 index fc1c382910..0000000000 --- a/dhp-common/src/main/java/eu/dnetlib/message/Message.java +++ /dev/null @@ -1,76 +0,0 @@ - -package eu.dnetlib.message; - -import java.io.IOException; -import java.util.Map; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -public class Message { - - private String workflowId; - - private String jobName; - - private MessageType type; - - private Map body; - - public static Message fromJson(final String json) throws IOException { - final ObjectMapper jsonMapper = new ObjectMapper(); - return jsonMapper.readValue(json, Message.class); - } - - public Message() { - } - - public Message(String workflowId, String jobName, MessageType type, Map body) { - this.workflowId = workflowId; - this.jobName = jobName; - this.type = type; - this.body = body; - } - - public String getWorkflowId() { - return workflowId; - } - - public void setWorkflowId(String workflowId) { - this.workflowId = workflowId; - } - - public String getJobName() { - return jobName; - } - - public void setJobName(String jobName) { - this.jobName = jobName; - } - - public MessageType getType() { - return type; - } - - public void setType(MessageType type) { - this.type = type; - } - - public Map getBody() { - return body; - } - - public void setBody(Map body) { - this.body = body; - } - - @Override - public String toString() { - final ObjectMapper jsonMapper = new ObjectMapper(); - try { - return jsonMapper.writeValueAsString(this); - } catch (JsonProcessingException e) { - return null; - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java b/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java deleted file mode 100644 index fb3f0bd956..0000000000 --- a/dhp-common/src/main/java/eu/dnetlib/message/MessageConsumer.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.message; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.concurrent.LinkedBlockingQueue; - -import com.rabbitmq.client.AMQP; -import com.rabbitmq.client.Channel; -import com.rabbitmq.client.DefaultConsumer; -import com.rabbitmq.client.Envelope; - -public class MessageConsumer extends DefaultConsumer { - - final LinkedBlockingQueue queueMessages; - - /** - * Constructs a new instance and records its association to the passed-in channel. - * - * @param channel the channel to which this consumer is attached - * @param queueMessages - */ - public MessageConsumer(Channel channel, LinkedBlockingQueue queueMessages) { - super(channel); - this.queueMessages = queueMessages; - } - - @Override - public void handleDelivery( - String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body) - throws IOException { - final String json = new String(body, StandardCharsets.UTF_8); - Message message = Message.fromJson(json); - try { - this.queueMessages.put(message); - System.out.println("Receiving Message " + message); - } catch (InterruptedException e) { - if (message.getType() == MessageType.REPORT) - throw new RuntimeException("Error on sending message"); - else { - // TODO LOGGING EXCEPTION - } - } finally { - getChannel().basicAck(envelope.getDeliveryTag(), false); - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java deleted file mode 100644 index 5ca79f3cc6..0000000000 --- a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java +++ /dev/null @@ -1,136 +0,0 @@ - -package eu.dnetlib.message; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeoutException; - -import com.rabbitmq.client.Channel; -import com.rabbitmq.client.Connection; -import com.rabbitmq.client.ConnectionFactory; - -public class MessageManager { - - private final String messageHost; - - private final String username; - - private final String password; - - private Connection connection; - - private final Map channels = new HashMap<>(); - - private boolean durable; - - private boolean autodelete; - - private final LinkedBlockingQueue queueMessages; - - public MessageManager( - String messageHost, - String username, - String password, - final LinkedBlockingQueue queueMessages) { - this.queueMessages = queueMessages; - this.messageHost = messageHost; - this.username = username; - this.password = password; - } - - public MessageManager( - String messageHost, - String username, - String password, - boolean durable, - boolean autodelete, - final LinkedBlockingQueue queueMessages) { - this.queueMessages = queueMessages; - this.messageHost = messageHost; - this.username = username; - this.password = password; - - this.durable = durable; - this.autodelete = autodelete; - } - - private Connection createConnection() throws IOException, TimeoutException { - ConnectionFactory factory = new ConnectionFactory(); - factory.setHost(this.messageHost); - factory.setUsername(this.username); - factory.setPassword(this.password); - return factory.newConnection(); - } - - private Channel createChannel( - final Connection connection, - final String queueName, - final boolean durable, - final boolean autodelete) - throws Exception { - Map args = new HashMap<>(); - args.put("x-message-ttl", 10000); - Channel channel = connection.createChannel(); - channel.queueDeclare(queueName, durable, false, this.autodelete, args); - return channel; - } - - private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete) - throws Exception { - if (channels.containsKey(queueName)) { - return channels.get(queueName); - } - - if (this.connection == null) { - this.connection = createConnection(); - } - channels.put(queueName, createChannel(this.connection, queueName, durable, autodelete)); - return channels.get(queueName); - } - - public void close() throws IOException { - channels - .values() - .forEach( - ch -> { - try { - ch.close(); - } catch (Exception e) { - // TODO LOG - } - }); - - this.connection.close(); - } - - public boolean sendMessage(final Message message, String queueName) throws Exception { - try { - Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete); - channel.basicPublish("", queueName, null, message.toString().getBytes()); - return true; - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - public boolean sendMessage( - final Message message, String queueName, boolean durable_var, boolean autodelete_var) - throws Exception { - try { - Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var); - channel.basicPublish("", queueName, null, message.toString().getBytes()); - return true; - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - public void startConsumingMessage( - final String queueName, final boolean durable, final boolean autodelete) throws Exception { - - Channel channel = createChannel(createConnection(), queueName, durable, autodelete); - channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages)); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java b/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java deleted file mode 100644 index 72cbda2528..0000000000 --- a/dhp-common/src/main/java/eu/dnetlib/message/MessageType.java +++ /dev/null @@ -1,6 +0,0 @@ - -package eu.dnetlib.message; - -public enum MessageType { - ONGOING, REPORT -} diff --git a/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java b/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java deleted file mode 100644 index 442f7b5c25..0000000000 --- a/dhp-common/src/test/java/eu/dnetlib/message/MessageTest.java +++ /dev/null @@ -1,51 +0,0 @@ - -package eu.dnetlib.message; - -import static org.junit.jupiter.api.Assertions.*; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import org.junit.jupiter.api.Test; - -public class MessageTest { - - @Test - public void fromJsonTest() throws IOException { - Message m = new Message(); - m.setWorkflowId("wId"); - m.setType(MessageType.ONGOING); - m.setJobName("Collection"); - Map body = new HashMap<>(); - body.put("parsedItem", "300"); - body.put("ExecutionTime", "30s"); - - m.setBody(body); - System.out.println("m = " + m); - Message m1 = Message.fromJson(m.toString()); - assertEquals(m1.getWorkflowId(), m.getWorkflowId()); - assertEquals(m1.getType(), m.getType()); - assertEquals(m1.getJobName(), m.getJobName()); - - assertNotNull(m1.getBody()); - m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it))); - assertEquals(m1.getJobName(), m.getJobName()); - } - - @Test - public void toStringTest() { - final String expectedJson = "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}"; - Message m = new Message(); - m.setWorkflowId("wId"); - m.setType(MessageType.ONGOING); - m.setJobName("Collection"); - Map body = new HashMap<>(); - body.put("parsedItem", "300"); - body.put("ExecutionTime", "30s"); - - m.setBody(body); - - assertEquals(expectedJson, m.toString()); - } -} diff --git a/pom.xml b/pom.xml index 3e0626aed2..cfe1edfbd4 100644 --- a/pom.xml +++ b/pom.xml @@ -374,11 +374,6 @@ provided - - com.rabbitmq - amqp-client - 5.6.0 - com.jayway.jsonpath json-path From 0634674add8c18d393e65ef68d200ba2be3bd6da Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 2 Feb 2021 12:12:14 +0100 Subject: [PATCH 10/17] implemented transformation test --- .../GenerateDataciteDatasetSpark.scala | 2 +- .../transformation/TransformSparkJobNode.java | 15 +- .../transformation/TransformationFactory.java | 4 +- .../oozie_app/config-default.xml | 5 +- .../dhp/transformation/oozie_app/workflow.xml | 53 ++++- .../dhp/aggregation/AggregationJobTest.java | 197 ++++++++++++++++++ .../GenerateNativeStoreSparkJobTest.java | 169 --------------- .../transformation/TransformationJobTest.java | 4 + .../dhp/collection/mdStoreCleanedVersion.json | 9 + 9 files changed, 275 insertions(+), 183 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala index 6837e94b21..f04f92c636 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala @@ -27,7 +27,7 @@ object GenerateDataciteDatasetSpark { val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) - + log.info(s"vocabulary size is ${vocabularies.getTerms("dnet:languages").size()}") val spark: SparkSession = SparkSession.builder().config(conf) .appName(GenerateDataciteDatasetSpark.getClass.getSimpleName) .master(master) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index b9df902a1d..193da38788 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -24,6 +24,7 @@ import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -60,15 +61,23 @@ public class TransformSparkJobNode { final String isLookupUrl = parser.get("isLookupUrl"); log.info(String.format("isLookupUrl: %s", isLookupUrl)); + final String dateOfTransformation = parser.get("dateOfTransformation"); + log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); + + final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); + final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService); + + log.info("Retrieved {} vocabularies", vocabularies.vocabularyNames().size()); + SparkConf conf = new SparkConf(); runWithSparkSession( conf, isSparkSessionManaged, spark -> transformRecords( - parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath(), - cleanedMdStoreVersion.getHdfsPath())); + parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath() + "/store", + cleanedMdStoreVersion.getHdfsPath() + "/store")); } public static void transformRecords(final Map args, final ISLookUpService isLookUpService, @@ -82,7 +91,7 @@ public class TransformSparkJobNode { final Encoder encoder = Encoders.bean(MetadataRecord.class); final Dataset mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder); final MapFunction XSLTTransformationFunction = TransformationFactory - .getTransformationPlugin(args, ct, isLookUpService); + .getTransformationPlugin(args, ct, isLookUpService); mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath + "/store"); log.info("Transformed item " + ct.getProcessedItems().count()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index d1f8969647..45ba2981ff 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -18,7 +18,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class TransformationFactory { private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); - public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/text()"; + public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; public static MapFunction getTransformationPlugin( final Map jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) @@ -57,7 +57,7 @@ public class TransformationFactory { private static String queryTransformationRuleFromIS(final String transformationRuleId, final ISLookUpService isLookUpService) throws Exception { final String query = String.format(TRULE_XQUERY, transformationRuleId); - log.info("asking query to IS: " + query); + System.out.println("asking query to IS: " + query); List result = isLookUpService.quickSearchProfile(query); if (result == null || result.isEmpty()) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml index e77dd09c9d..bdd48b0ab2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/config-default.xml @@ -15,8 +15,5 @@ oozie.action.sharelib.for.spark spark2 - - oozie.launcher.mapreduce.user.classpath.first - true - + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml index aff87dc79e..43b270eaf5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml @@ -18,12 +18,17 @@ transformationPlugin + XSLT_TRANSFORM The transformation Plugin dateOfTransformation The timestamp of the transformation date + + isLookupUrl + The IS lookUp service endopoint + @@ -35,22 +40,36 @@ + + + oozie.launcher.mapreduce.user.classpath.first + true + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode --actionREAD_LOCK --mdStoreID${mdStoreInputId} --mdStoreManagerURI${mdStoreManagerURI} + + + + oozie.launcher.mapreduce.user.classpath.first + true + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode --actionNEW_VERSION --mdStoreID${mdStoreOutputId} --mdStoreManagerURI${mdStoreManagerURI} + @@ -62,7 +81,7 @@ cluster Transform MetadataStore eu.dnetlib.dhp.transformation.TransformSparkJobNode - dhp-aggregations-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -72,11 +91,12 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --mdstoreInputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} - --mdstoreOutputVersion${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + --mdstoreOutputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdstoreInputVersion${wf:actionData('BeginRead')['mdStoreReadLockVersion']} --dateOfTransformation${dateOfTransformation} --transformationPlugin${transformationPlugin} --transformationRuleId${transformationRuleId} + --isLookupUrl${isLookupUrl} @@ -84,6 +104,13 @@ + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode --actionREAD_UNLOCK --mdStoreManagerURI${mdStoreManagerURI} @@ -96,6 +123,12 @@ + + + oozie.launcher.mapreduce.user.classpath.first + true + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode --actionCOMMIT --namenode${nameNode} @@ -108,18 +141,30 @@ + + + oozie.launcher.mapreduce.user.classpath.first + true + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode --actionREAD_UNLOCK --mdStoreManagerURI${mdStoreManagerURI} --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} - + + + + oozie.launcher.mapreduce.user.classpath.first + true + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode --actionROLLBACK --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java new file mode 100644 index 0000000000..c9ccbc7ff4 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java @@ -0,0 +1,197 @@ + +package eu.dnetlib.dhp.aggregation; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.transformation.TransformSparkJobNode; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.model.mdstore.MetadataRecord; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class AggregationJobTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static Encoder encoder; + + private static final String encoding = "XML"; + private static final String dateOfCollection = System.currentTimeMillis() + ""; + private static final String xpath = "//*[local-name()='header']/*[local-name()='identifier']"; + private static String provenance; + + private static final Logger log = LoggerFactory.getLogger(AggregationJobTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + provenance = IOUtils.toString(AggregationJobTest.class.getResourceAsStream("/eu/dnetlib/dhp/collection/provenance.json")); + workingDir = Files.createTempDirectory(AggregationJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + + conf.setAppName(AggregationJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + encoder = Encoders.bean(MetadataRecord.class); + spark = SparkSession + .builder() + .appName(AggregationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + @Order(1) + public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + + MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); + FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); + + IOUtils + .copy( + getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/sequence_file"), + new FileOutputStream(mdStoreV1.getHdfsPath() + "/sequence_file")); + + GenerateNativeStoreSparkJob + .main( + new String[]{ + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-encoding", encoding, + "-dateOfCollection", dateOfCollection, + "-provenance", provenance, + "-xpath", xpath, + "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), + "-readMdStoreVersion", "", + "-workflowId", "abc" + }); + + verify(mdStoreV1); + } + + @Test + @Order(2) + public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + + MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); + FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); + + IOUtils + .copy( + getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/sequence_file"), + new FileOutputStream(mdStoreV2.getHdfsPath() + "/sequence_file")); + + MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); + + GenerateNativeStoreSparkJob + .main( + new String[]{ + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-encoding", encoding, + "-dateOfCollection", dateOfCollection, + "-provenance", provenance, + "-xpath", xpath, + "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), + "-readMdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), + "-workflowId", "abc" + }); + + verify(mdStoreV2); + } + + + //@Test + @Order(3) + public void testTransformSparkJob() throws Exception { + + MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); + MDStoreVersion mdStoreCleanedVersion = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json"); + + TransformSparkJobNode.main(new String[]{ + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-dateOfTransformation", dateOfCollection, + "-mdstoreInputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), + "-mdstoreOutputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreCleanedVersion), + "-transformationPlugin", "XSLT_TRANSFORM", + "-isLookupUrl", "https://dev-openaire.d4science.org/is/services/isLookUp", + "-transformationRuleId", "183dde52-a69b-4db9-a07e-1ef2be105294_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="}); + + } + + protected void verify(MDStoreVersion mdStoreVersion) throws IOException { + Assertions.assertTrue(new File(mdStoreVersion.getHdfsPath()).exists()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + long seqFileSize = sc + .sequenceFile(mdStoreVersion.getHdfsPath() + "/sequence_file", IntWritable.class, Text.class) + .count(); + + final Dataset mdstore = spark.read().load(mdStoreVersion.getHdfsPath() + "/store").as(encoder); + long mdStoreSize = mdstore.count(); + + long declaredSize = Long.parseLong(IOUtils.toString(new FileReader(mdStoreVersion.getHdfsPath() + "/size"))); + + Assertions.assertEquals(seqFileSize, declaredSize, "the size must be equal"); + Assertions.assertEquals(seqFileSize, mdStoreSize, "the size must be equal"); + + long uniqueIds = mdstore + .map((MapFunction) MetadataRecord::getId, Encoders.STRING()) + .distinct() + .count(); + + Assertions.assertEquals(seqFileSize, uniqueIds, "the size must be equal"); + } + + private MDStoreVersion prepareVersion(String filename) throws IOException { + MDStoreVersion mdstore = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResource(filename)), MDStoreVersion.class); + mdstore.setHdfsPath(String.format(mdstore.getHdfsPath(), workingDir.toString())); + return mdstore; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java deleted file mode 100644 index 715ad8fa6f..0000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ /dev/null @@ -1,169 +0,0 @@ - -package eu.dnetlib.dhp.collection; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoder; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; - -@TestMethodOrder(MethodOrderer.OrderAnnotation.class) -public class GenerateNativeStoreSparkJobTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static Encoder encoder; - - private static final String encoding = "XML"; - private static final String dateOfCollection = System.currentTimeMillis() + ""; - private static final String xpath = "//*[local-name()='header']/*[local-name()='identifier']"; - private static String provenance; - - private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJobTest.class); - - @BeforeAll - public static void beforeAll() throws IOException { - provenance = IOUtils.toString(GenerateNativeStoreSparkJobTest.class.getResourceAsStream("provenance.json")); - workingDir = Files.createTempDirectory(GenerateNativeStoreSparkJobTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - - conf.setAppName(GenerateNativeStoreSparkJobTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - encoder = Encoders.bean(MetadataRecord.class); - spark = SparkSession - .builder() - .appName(GenerateNativeStoreSparkJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - @Order(1) - public void testGenerateNativeStoreSparkJobRefresh() throws Exception { - - MDStoreVersion mdStoreV1 = prepareVersion("mdStoreVersion_1.json"); - FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); - - IOUtils - .copy( - getClass().getResourceAsStream("sequence_file"), - new FileOutputStream(mdStoreV1.getHdfsPath() + "/sequence_file")); - - GenerateNativeStoreSparkJob - .main( - new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-encoding", encoding, - "-dateOfCollection", dateOfCollection, - "-provenance", provenance, - "-xpath", xpath, - "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), - "-readMdStoreVersion", "", - "-workflowId", "abc" - }); - - verify(mdStoreV1); - } - - @Test - @Order(2) - public void testGenerateNativeStoreSparkJobIncremental() throws Exception { - - MDStoreVersion mdStoreV2 = prepareVersion("mdStoreVersion_2.json"); - FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); - - IOUtils - .copy( - getClass().getResourceAsStream("sequence_file"), - new FileOutputStream(mdStoreV2.getHdfsPath() + "/sequence_file")); - - MDStoreVersion mdStoreV1 = prepareVersion("mdStoreVersion_1.json"); - - GenerateNativeStoreSparkJob - .main( - new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-encoding", encoding, - "-dateOfCollection", dateOfCollection, - "-provenance", provenance, - "-xpath", xpath, - "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), - "-readMdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), - "-workflowId", "abc" - }); - - verify(mdStoreV2); - } - - protected void verify(MDStoreVersion mdStoreVersion) throws IOException { - Assertions.assertTrue(new File(mdStoreVersion.getHdfsPath()).exists()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - long seqFileSize = sc - .sequenceFile(mdStoreVersion.getHdfsPath() + "/sequence_file", IntWritable.class, Text.class) - .count(); - - final Dataset mdstore = spark.read().load(mdStoreVersion.getHdfsPath() + "/store").as(encoder); - long mdStoreSize = mdstore.count(); - - long declaredSize = Long.parseLong(IOUtils.toString(new FileReader(mdStoreVersion.getHdfsPath() + "/size"))); - - Assertions.assertEquals(seqFileSize, declaredSize, "the size must be equal"); - Assertions.assertEquals(seqFileSize, mdStoreSize, "the size must be equal"); - - long uniqueIds = mdstore - .map((MapFunction) MetadataRecord::getId, Encoders.STRING()) - .distinct() - .count(); - - Assertions.assertEquals(seqFileSize, uniqueIds, "the size must be equal"); - } - - private MDStoreVersion prepareVersion(String filename) throws IOException { - MDStoreVersion mdstore = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResource(filename)), MDStoreVersion.class); - mdstore.setHdfsPath(String.format(mdstore.getHdfsPath(), workingDir.toString())); - return mdstore; - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 6a80e01e28..9e46b5f953 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -38,6 +38,7 @@ import eu.dnetlib.dhp.collection.CollectionJobTest; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -74,6 +75,9 @@ public class TransformationJobTest { spark.stop(); } + + + @Test @DisplayName("Test Transform Single XML using XSLTTransformator") public void testTransformSaxonHE() throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json new file mode 100644 index 0000000000..a5adc8fda2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json @@ -0,0 +1,9 @@ +{ + "id":"md-cleaned", + "mdstore":"md-cleaned", + "writing":false, + "readCount":1, + "lastUpdate":1612187563099, + "size":71, + "hdfsPath":"%s/mdstore/md-cleaned" +} \ No newline at end of file From 75807ea5ae69a1776b65ff3b31a18db127e80835 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 2 Feb 2021 12:28:21 +0100 Subject: [PATCH 11/17] factored out constants --- .../common/AggregationConstants.java | 15 +++++ .../common/AggregationUtility.java | 3 + .../GenerateNativeStoreSparkJob.java | 55 +++++++------------ .../worker/CollectorWorkerApplication.java | 4 +- .../transformation/TransformSparkJobNode.java | 32 +++++++---- .../dhp/aggregation/AggregationJobTest.java | 2 +- 6 files changed, 63 insertions(+), 48 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java new file mode 100644 index 0000000000..15e0bb454c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java @@ -0,0 +1,15 @@ +package eu.dnetlib.dhp.aggregation.common; + +public class AggregationConstants { + + public static final String SEQUENCE_FILE_NAME = "/sequence_file"; + public static final String MDSTORE_DATA_PATH = "/store"; + public static final String MDSTORE_SIZE_PATH = "/size"; + + public static final String CONTENT_TOTALITEMS = "TotalItems"; + public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; + public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; + + + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java index eb971c4754..d657dee027 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java @@ -5,6 +5,7 @@ import java.io.BufferedOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -21,6 +22,8 @@ public class AggregationUtility { private static final Logger log = LoggerFactory.getLogger(AggregationUtility.class); + public static final ObjectMapper MAPPER = new ObjectMapper(); + public static void writeTotalSizeOnHDFS(final SparkSession spark, final Long total, final String path) throws IOException { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index bbed36a9c3..13813623cd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -1,15 +1,11 @@ package eu.dnetlib.dhp.collection; -import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Objects; -import java.util.Optional; - +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.model.mdstore.Provenance; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.IntWritable; @@ -26,26 +22,22 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.worker.CollectorWorkerApplication; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.model.mdstore.Provenance; -import net.sf.saxon.expr.Component; import scala.Tuple2; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.Optional; + +import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + public class GenerateNativeStoreSparkJob { private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); - private static final ObjectMapper MAPPER = new ObjectMapper(); - - private static final String DATASET_NAME = "/store"; - public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -88,11 +80,6 @@ public class GenerateNativeStoreSparkJob { log.info("isSparkSessionManaged: {}", isSparkSessionManaged); SparkConf conf = new SparkConf(); - /* - * conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf .registerKryoClasses( new - * Class[] { MetadataRecord.class, Provenance.class }); - */ - runWithSparkSession( conf, isSparkSessionManaged, @@ -109,10 +96,10 @@ public class GenerateNativeStoreSparkJob { MDStoreVersion readVersion) throws IOException { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems"); - final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords"); + final LongAccumulator totalItems = sc.sc().longAccumulator(CONTENT_TOTALITEMS); + final LongAccumulator invalidRecords = sc.sc().longAccumulator(CONTENT_INVALIDRECORDS); - final String seqFilePath = currentVersion.getHdfsPath() + CollectorWorkerApplication.SEQUENCE_FILE_NAME; + final String seqFilePath = currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME; final JavaRDD nativeStore = sc .sequenceFile(seqFilePath, IntWritable.class, Text.class) .map( @@ -130,13 +117,13 @@ public class GenerateNativeStoreSparkJob { final Encoder encoder = Encoders.bean(MetadataRecord.class); final Dataset mdstore = spark.createDataset(nativeStore.rdd(), encoder); - final String targetPath = currentVersion.getHdfsPath() + DATASET_NAME; + final String targetPath = currentVersion.getHdfsPath() + MDSTORE_DATA_PATH; if (readVersion != null) { // INCREMENTAL MODE log.info("updating {} incrementally with {}", targetPath, readVersion.getHdfsPath()); Dataset currentMdStoreVersion = spark .read() - .load(readVersion.getHdfsPath() + DATASET_NAME) + .load(readVersion.getHdfsPath() + MDSTORE_DATA_PATH) .as(encoder); TypedColumn aggregator = new MDStoreAggregator().toColumn(); @@ -159,7 +146,7 @@ public class GenerateNativeStoreSparkJob { final Long total = spark.read().load(targetPath).count(); log.info("collected {} records for datasource '{}'", total, provenance.getDatasourceName()); - writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + "/size"); + writeTotalSizeOnHDFS(spark, total, currentVersion.getHdfsPath() + MDSTORE_SIZE_PATH); } public static class MDStoreAggregator extends Aggregator { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java index e24b9ad1da..da5b197d64 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.collection.worker; +import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; + import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,8 +27,6 @@ public class CollectorWorkerApplication { private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory(); - public static String SEQUENCE_FILE_NAME = "/sequence_file"; - /** * @param args */ diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index 193da38788..f8ddf47e2d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -2,14 +2,17 @@ package eu.dnetlib.dhp.transformation; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; +import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; import java.io.IOException; import java.util.Map; import java.util.Optional; +import eu.dnetlib.dhp.aggregation.common.AggregationConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; + import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; @@ -76,29 +79,36 @@ public class TransformSparkJobNode { conf, isSparkSessionManaged, spark -> transformRecords( - parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath() + "/store", - cleanedMdStoreVersion.getHdfsPath() + "/store")); + parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH, + cleanedMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH)); } public static void transformRecords(final Map args, final ISLookUpService isLookUpService, final SparkSession spark, final String inputPath, final String outputPath) throws DnetTransformationException, IOException { - final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems"); - final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems"); - final LongAccumulator transformedItems = spark.sparkContext().longAccumulator("transformedItems"); + final LongAccumulator totalItems = spark.sparkContext().longAccumulator(CONTENT_TOTALITEMS); + final LongAccumulator errorItems = spark.sparkContext().longAccumulator(CONTENT_INVALIDRECORDS); + final LongAccumulator transformedItems = spark.sparkContext().longAccumulator(CONTENT_TRANSFORMEDRECORDS); final AggregationCounter ct = new AggregationCounter(totalItems, errorItems, transformedItems); final Encoder encoder = Encoders.bean(MetadataRecord.class); - final Dataset mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder); - final MapFunction XSLTTransformationFunction = TransformationFactory - .getTransformationPlugin(args, ct, isLookUpService); - mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath + "/store"); + + saveDataset( + spark.read() + .format("parquet") + .load(inputPath) + .as(encoder) + .map( + TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), + encoder), + outputPath + MDSTORE_DATA_PATH); + log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Total item " + ct.getTotalItems().count()); log.info("Transformation Error item " + ct.getErrorItems().count()); - AggregationUtility.writeTotalSizeOnHDFS(spark, ct.getProcessedItems().count(), outputPath + "/size"); + writeTotalSizeOnHDFS(spark, ct.getProcessedItems().count(), outputPath + MDSTORE_SIZE_PATH); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java index c9ccbc7ff4..ac65ef6a9a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java @@ -145,7 +145,7 @@ public class AggregationJobTest { } - //@Test + @Test @Order(3) public void testTransformSparkJob() throws Exception { From bb89b99b24d4ad7e2bf05d383c87a74874af4929 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 2 Feb 2021 12:34:14 +0100 Subject: [PATCH 12/17] code formatting --- .../common/AggregationConstants.java | 15 +- .../common/AggregationUtility.java | 3 +- .../GenerateNativeStoreSparkJob.java | 32 +-- .../transformation/TransformSparkJobNode.java | 28 +- .../dhp/aggregation/AggregationJobTest.java | 250 +++++++++--------- .../transformation/TransformationJobTest.java | 3 - 6 files changed, 164 insertions(+), 167 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java index 15e0bb454c..7c5ad354d3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationConstants.java @@ -1,15 +1,14 @@ + package eu.dnetlib.dhp.aggregation.common; public class AggregationConstants { - public static final String SEQUENCE_FILE_NAME = "/sequence_file"; - public static final String MDSTORE_DATA_PATH = "/store"; - public static final String MDSTORE_SIZE_PATH = "/size"; - - public static final String CONTENT_TOTALITEMS = "TotalItems"; - public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; - public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; - + public static final String SEQUENCE_FILE_NAME = "/sequence_file"; + public static final String MDSTORE_DATA_PATH = "/store"; + public static final String MDSTORE_SIZE_PATH = "/size"; + public static final String CONTENT_TOTALITEMS = "TotalItems"; + public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; + public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java index d657dee027..7332ac0715 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationUtility.java @@ -5,7 +5,6 @@ import java.io.BufferedOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -15,6 +14,8 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index 13813623cd..fdf3965d67 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -1,11 +1,16 @@ package eu.dnetlib.dhp.collection; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.model.mdstore.Provenance; +import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.IntWritable; @@ -22,18 +27,15 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.model.mdstore.Provenance; import scala.Tuple2; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Objects; -import java.util.Optional; - -import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; -import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - public class GenerateNativeStoreSparkJob { private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index f8ddf47e2d..0a01faf1ea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -1,19 +1,17 @@ package eu.dnetlib.dhp.transformation; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.saveDataset; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.writeTotalSizeOnHDFS; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; import java.util.Map; import java.util.Optional; -import eu.dnetlib.dhp.aggregation.common.AggregationConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; - -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; @@ -25,7 +23,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; -import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; @@ -67,7 +64,6 @@ public class TransformSparkJobNode { final String dateOfTransformation = parser.get("dateOfTransformation"); log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); - final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService); @@ -94,15 +90,15 @@ public class TransformSparkJobNode { final Encoder encoder = Encoders.bean(MetadataRecord.class); saveDataset( - spark.read() - .format("parquet") - .load(inputPath) - .as(encoder) - .map( - TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), - encoder), - outputPath + MDSTORE_DATA_PATH); - + spark + .read() + .format("parquet") + .load(inputPath) + .as(encoder) + .map( + TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), + encoder), + outputPath + MDSTORE_DATA_PATH); log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Total item " + ct.getTotalItems().count()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java index ac65ef6a9a..d5ecc9cb03 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/aggregation/AggregationJobTest.java @@ -12,11 +12,6 @@ import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; -import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.transformation.TransformSparkJobNode; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.IntWritable; @@ -35,163 +30,170 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.transformation.TransformSparkJobNode; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class AggregationJobTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - private static Encoder encoder; + private static Encoder encoder; - private static final String encoding = "XML"; - private static final String dateOfCollection = System.currentTimeMillis() + ""; - private static final String xpath = "//*[local-name()='header']/*[local-name()='identifier']"; - private static String provenance; + private static final String encoding = "XML"; + private static final String dateOfCollection = System.currentTimeMillis() + ""; + private static final String xpath = "//*[local-name()='header']/*[local-name()='identifier']"; + private static String provenance; - private static final Logger log = LoggerFactory.getLogger(AggregationJobTest.class); + private static final Logger log = LoggerFactory.getLogger(AggregationJobTest.class); - @BeforeAll - public static void beforeAll() throws IOException { - provenance = IOUtils.toString(AggregationJobTest.class.getResourceAsStream("/eu/dnetlib/dhp/collection/provenance.json")); - workingDir = Files.createTempDirectory(AggregationJobTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); + @BeforeAll + public static void beforeAll() throws IOException { + provenance = IOUtils + .toString(AggregationJobTest.class.getResourceAsStream("/eu/dnetlib/dhp/collection/provenance.json")); + workingDir = Files.createTempDirectory(AggregationJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); + SparkConf conf = new SparkConf(); - conf.setAppName(AggregationJobTest.class.getSimpleName()); + conf.setAppName(AggregationJobTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - encoder = Encoders.bean(MetadataRecord.class); - spark = SparkSession - .builder() - .appName(AggregationJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + encoder = Encoders.bean(MetadataRecord.class); + spark = SparkSession + .builder() + .appName(AggregationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - @Order(1) - public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + @Test + @Order(1) + public void testGenerateNativeStoreSparkJobRefresh() throws Exception { - MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); - FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); + MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); + FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); - IOUtils - .copy( - getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/sequence_file"), - new FileOutputStream(mdStoreV1.getHdfsPath() + "/sequence_file")); + IOUtils + .copy( + getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/sequence_file"), + new FileOutputStream(mdStoreV1.getHdfsPath() + "/sequence_file")); - GenerateNativeStoreSparkJob - .main( - new String[]{ - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-encoding", encoding, - "-dateOfCollection", dateOfCollection, - "-provenance", provenance, - "-xpath", xpath, - "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), - "-readMdStoreVersion", "", - "-workflowId", "abc" - }); + GenerateNativeStoreSparkJob + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-encoding", encoding, + "-dateOfCollection", dateOfCollection, + "-provenance", provenance, + "-xpath", xpath, + "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), + "-readMdStoreVersion", "", + "-workflowId", "abc" + }); - verify(mdStoreV1); - } + verify(mdStoreV1); + } - @Test - @Order(2) - public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + @Test + @Order(2) + public void testGenerateNativeStoreSparkJobIncremental() throws Exception { - MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); - FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); + MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); + FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); - IOUtils - .copy( - getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/sequence_file"), - new FileOutputStream(mdStoreV2.getHdfsPath() + "/sequence_file")); + IOUtils + .copy( + getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/sequence_file"), + new FileOutputStream(mdStoreV2.getHdfsPath() + "/sequence_file")); - MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); + MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); - GenerateNativeStoreSparkJob - .main( - new String[]{ - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-encoding", encoding, - "-dateOfCollection", dateOfCollection, - "-provenance", provenance, - "-xpath", xpath, - "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), - "-readMdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), - "-workflowId", "abc" - }); + GenerateNativeStoreSparkJob + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-encoding", encoding, + "-dateOfCollection", dateOfCollection, + "-provenance", provenance, + "-xpath", xpath, + "-mdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), + "-readMdStoreVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV1), + "-workflowId", "abc" + }); - verify(mdStoreV2); - } + verify(mdStoreV2); + } + @Test + @Order(3) + public void testTransformSparkJob() throws Exception { - @Test - @Order(3) - public void testTransformSparkJob() throws Exception { + MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); + MDStoreVersion mdStoreCleanedVersion = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json"); - MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); - MDStoreVersion mdStoreCleanedVersion = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json"); + TransformSparkJobNode.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-dateOfTransformation", dateOfCollection, + "-mdstoreInputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), + "-mdstoreOutputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreCleanedVersion), + "-transformationPlugin", "XSLT_TRANSFORM", + "-isLookupUrl", "https://dev-openaire.d4science.org/is/services/isLookUp", + "-transformationRuleId", + "183dde52-a69b-4db9-a07e-1ef2be105294_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" + }); - TransformSparkJobNode.main(new String[]{ - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-dateOfTransformation", dateOfCollection, - "-mdstoreInputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), - "-mdstoreOutputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreCleanedVersion), - "-transformationPlugin", "XSLT_TRANSFORM", - "-isLookupUrl", "https://dev-openaire.d4science.org/is/services/isLookUp", - "-transformationRuleId", "183dde52-a69b-4db9-a07e-1ef2be105294_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="}); + } - } + protected void verify(MDStoreVersion mdStoreVersion) throws IOException { + Assertions.assertTrue(new File(mdStoreVersion.getHdfsPath()).exists()); - protected void verify(MDStoreVersion mdStoreVersion) throws IOException { - Assertions.assertTrue(new File(mdStoreVersion.getHdfsPath()).exists()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + long seqFileSize = sc + .sequenceFile(mdStoreVersion.getHdfsPath() + "/sequence_file", IntWritable.class, Text.class) + .count(); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - long seqFileSize = sc - .sequenceFile(mdStoreVersion.getHdfsPath() + "/sequence_file", IntWritable.class, Text.class) - .count(); + final Dataset mdstore = spark.read().load(mdStoreVersion.getHdfsPath() + "/store").as(encoder); + long mdStoreSize = mdstore.count(); - final Dataset mdstore = spark.read().load(mdStoreVersion.getHdfsPath() + "/store").as(encoder); - long mdStoreSize = mdstore.count(); + long declaredSize = Long.parseLong(IOUtils.toString(new FileReader(mdStoreVersion.getHdfsPath() + "/size"))); - long declaredSize = Long.parseLong(IOUtils.toString(new FileReader(mdStoreVersion.getHdfsPath() + "/size"))); + Assertions.assertEquals(seqFileSize, declaredSize, "the size must be equal"); + Assertions.assertEquals(seqFileSize, mdStoreSize, "the size must be equal"); - Assertions.assertEquals(seqFileSize, declaredSize, "the size must be equal"); - Assertions.assertEquals(seqFileSize, mdStoreSize, "the size must be equal"); + long uniqueIds = mdstore + .map((MapFunction) MetadataRecord::getId, Encoders.STRING()) + .distinct() + .count(); - long uniqueIds = mdstore - .map((MapFunction) MetadataRecord::getId, Encoders.STRING()) - .distinct() - .count(); + Assertions.assertEquals(seqFileSize, uniqueIds, "the size must be equal"); + } - Assertions.assertEquals(seqFileSize, uniqueIds, "the size must be equal"); - } - - private MDStoreVersion prepareVersion(String filename) throws IOException { - MDStoreVersion mdstore = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResource(filename)), MDStoreVersion.class); - mdstore.setHdfsPath(String.format(mdstore.getHdfsPath(), workingDir.toString())); - return mdstore; - } + private MDStoreVersion prepareVersion(String filename) throws IOException { + MDStoreVersion mdstore = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResource(filename)), MDStoreVersion.class); + mdstore.setHdfsPath(String.format(mdstore.getHdfsPath(), workingDir.toString())); + return mdstore; + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 9e46b5f953..d03c3acef4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -75,9 +75,6 @@ public class TransformationJobTest { spark.stop(); } - - - @Test @DisplayName("Test Transform Single XML using XSLTTransformator") public void testTransformSaxonHE() throws Exception { From ca4391aa1c5c03ecb0477fa287b77da97e3f9c8b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 2 Feb 2021 12:44:04 +0100 Subject: [PATCH 13/17] minor changes --- .../transformation/TransformSparkJobNode.java | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index 0a01faf1ea..51f69de101 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -2,8 +2,7 @@ package eu.dnetlib.dhp.transformation; import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; -import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.saveDataset; -import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.writeTotalSizeOnHDFS; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; @@ -19,8 +18,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -52,11 +49,14 @@ public class TransformSparkJobNode { final String mdstoreInputVersion = parser.get("mdstoreInputVersion"); final String mdstoreOutputVersion = parser.get("mdstoreOutputVersion"); - // TODO this variable will be used after implementing Messaging with DNet Aggregator - final ObjectMapper jsonMapper = new ObjectMapper(); - final MDStoreVersion nativeMdStoreVersion = jsonMapper.readValue(mdstoreInputVersion, MDStoreVersion.class); - final MDStoreVersion cleanedMdStoreVersion = jsonMapper.readValue(mdstoreOutputVersion, MDStoreVersion.class); + final MDStoreVersion nativeMdStoreVersion = MAPPER.readValue(mdstoreInputVersion, MDStoreVersion.class); + final String inputPath = nativeMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH; + log.info("input path: {}", inputPath); + + final MDStoreVersion cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, MDStoreVersion.class); + final String outputPath = cleanedMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH; + log.info("output path: {}", outputPath); final String isLookupUrl = parser.get("isLookupUrl"); log.info(String.format("isLookupUrl: %s", isLookupUrl)); @@ -74,9 +74,10 @@ public class TransformSparkJobNode { runWithSparkSession( conf, isSparkSessionManaged, - spark -> transformRecords( - parser.getObjectMap(), isLookupService, spark, nativeMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH, - cleanedMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH)); + spark -> { + transformRecords( + parser.getObjectMap(), isLookupService, spark, inputPath, outputPath); + }); } public static void transformRecords(final Map args, final ISLookUpService isLookUpService, From bde14b149a5e1d5eb249ef80db9d6d1a10d670a7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 2 Feb 2021 12:49:29 +0100 Subject: [PATCH 14/17] fixed transformation target paths --- .../transformation/TransformSparkJobNode.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index 51f69de101..e1830ed28e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -11,6 +11,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; @@ -52,11 +53,11 @@ public class TransformSparkJobNode { final MDStoreVersion nativeMdStoreVersion = MAPPER.readValue(mdstoreInputVersion, MDStoreVersion.class); final String inputPath = nativeMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH; - log.info("input path: {}", inputPath); + log.info("inputPath: {}", inputPath); final MDStoreVersion cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, MDStoreVersion.class); - final String outputPath = cleanedMdStoreVersion.getHdfsPath() + MDSTORE_DATA_PATH; - log.info("output path: {}", outputPath); + final String outputBasePath = cleanedMdStoreVersion.getHdfsPath(); + log.info("outputBasePath: {}", outputBasePath); final String isLookupUrl = parser.get("isLookupUrl"); log.info(String.format("isLookupUrl: %s", isLookupUrl)); @@ -76,12 +77,12 @@ public class TransformSparkJobNode { isSparkSessionManaged, spark -> { transformRecords( - parser.getObjectMap(), isLookupService, spark, inputPath, outputPath); + parser.getObjectMap(), isLookupService, spark, inputPath, outputBasePath); }); } public static void transformRecords(final Map args, final ISLookUpService isLookUpService, - final SparkSession spark, final String inputPath, final String outputPath) + final SparkSession spark, final String inputPath, final String outputBasePath) throws DnetTransformationException, IOException { final LongAccumulator totalItems = spark.sparkContext().longAccumulator(CONTENT_TOTALITEMS); @@ -90,22 +91,21 @@ public class TransformSparkJobNode { final AggregationCounter ct = new AggregationCounter(totalItems, errorItems, transformedItems); final Encoder encoder = Encoders.bean(MetadataRecord.class); - saveDataset( - spark + final Dataset mdstore = spark .read() .format("parquet") .load(inputPath) .as(encoder) .map( - TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), - encoder), - outputPath + MDSTORE_DATA_PATH); + TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), + encoder); + saveDataset(mdstore, outputBasePath + MDSTORE_DATA_PATH); log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Total item " + ct.getTotalItems().count()); log.info("Transformation Error item " + ct.getErrorItems().count()); - writeTotalSizeOnHDFS(spark, ct.getProcessedItems().count(), outputPath + MDSTORE_SIZE_PATH); + writeTotalSizeOnHDFS(spark, mdstore.count(), outputBasePath + MDSTORE_SIZE_PATH); } } From ac46c247d2261c2dc2a1c5845d6355ca5088537f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 2 Feb 2021 14:24:00 +0100 Subject: [PATCH 15/17] code formatting --- .../dhp/transformation/TransformSparkJobNode.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index e1830ed28e..e1b1b849c4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -92,13 +92,13 @@ public class TransformSparkJobNode { final Encoder encoder = Encoders.bean(MetadataRecord.class); final Dataset mdstore = spark - .read() - .format("parquet") - .load(inputPath) - .as(encoder) - .map( - TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), - encoder); + .read() + .format("parquet") + .load(inputPath) + .as(encoder) + .map( + TransformationFactory.getTransformationPlugin(args, ct, isLookUpService), + encoder); saveDataset(mdstore, outputBasePath + MDSTORE_DATA_PATH); log.info("Transformed item " + ct.getProcessedItems().count()); From 53884d12c29d8ba746c4e6ebb68492b4212a1c45 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 2 Feb 2021 14:38:03 +0100 Subject: [PATCH 16/17] code formatting --- .../dhp/collection/oozie_app/workflow.xml | 19 +++---------------- .../dhp/transformation/oozie_app/workflow.xml | 11 ++++------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml index 9c213bee5f..2b2cf9dcee 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml @@ -4,7 +4,6 @@ apiDescription A json encoding of the API Description class - dataSourceInfo A json encoding of the Datasource Info @@ -13,50 +12,43 @@ identifierPath An xpath to retrieve the metadata identifier for the generation of DNet Identifier - metadataEncoding The type of the metadata XML/JSON - timestamp The timestamp of the collection date - workflowId The identifier of the workflow - mdStoreID The identifier of the mdStore - mdStoreManagerURI The URI of the MDStore Manager - collectionMode Should be REFRESH or INCREMENTAL - + ${jobTracker} ${nameNode} - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - ${wf:conf('collectionMode') eq 'REFRESH'} @@ -77,8 +69,6 @@ - - eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode @@ -129,7 +119,6 @@ - ${wf:conf('collectionMode') eq 'REFRESH'} @@ -182,8 +171,6 @@ - - eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode @@ -195,6 +182,6 @@ - +
\ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml index 43b270eaf5..9e01936d4e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/oozie_app/workflow.xml @@ -29,11 +29,10 @@ isLookupUrl The IS lookUp service endopoint - - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -51,11 +50,11 @@ --mdStoreID${mdStoreInputId} --mdStoreManagerURI${mdStoreManagerURI} -
+ @@ -69,7 +68,6 @@ --mdStoreID${mdStoreOutputId} --mdStoreManagerURI${mdStoreManagerURI} - @@ -173,8 +171,7 @@ - - - + + \ No newline at end of file From 0e8a4f9f1acbb03d1ec8c5cefcc6caff053cb532 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 3 Feb 2021 12:33:41 +0100 Subject: [PATCH 17/17] better logging, WIP: collectorWorker error reporting --- .../dhp/application/ApplicationUtils.java | 21 +++++ .../mdstore/MDStoreActionNode.java | 32 +++---- .../collection/plugin/CollectorPlugin.java | 3 + .../plugin/oai/OaiCollectorPlugin.java | 21 ++++- .../collection/plugin/oai/OaiIterator.java | 17 +++- .../plugin/oai/OaiIteratorFactory.java | 6 +- .../collection/worker/CollectorWorker.java | 87 +++++++++---------- .../worker/CollectorWorkerApplication.java | 20 +++-- .../worker/utils/CollectorPluginFactory.java | 2 +- .../worker/utils/HttpConnector.java | 84 +++++++----------- .../DnetCollectorWorkerApplicationTests.java | 2 +- 11 files changed, 159 insertions(+), 136 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java new file mode 100644 index 0000000000..531c13af36 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.application; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.Properties; + +public class ApplicationUtils { + + public static void populateOOZIEEnv(final String paramName, String value) throws Exception { + File file = new File(System.getProperty("oozie.action.output.properties")); + Properties props = new Properties(); + + props.setProperty(paramName, value); + OutputStream os = new FileOutputStream(file); + props.store(os, ""); + os.close(); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java index 6cb0537b2e..3e471cfc83 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -1,6 +1,9 @@ package eu.dnetlib.dhp.aggregation.mdstore; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; +import static eu.dnetlib.dhp.application.ApplicationUtils.*; + import java.io.File; import java.io.FileOutputStream; import java.io.OutputStream; @@ -16,11 +19,8 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.worker.CollectorWorker; import eu.dnetlib.dhp.common.rest.DNetRestClient; public class MDStoreActionNode { @@ -28,11 +28,8 @@ public class MDStoreActionNode { enum MDAction { NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK - } - private static final ObjectMapper mapper = new ObjectMapper(); - public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; @@ -48,13 +45,13 @@ public class MDStoreActionNode { final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser( IOUtils .toString( - CollectorWorker.class + MDStoreActionNode.class .getResourceAsStream( "/eu/dnetlib/dhp/collection/mdstore_action_parameters.json"))); argumentParser.parseArgument(args); final MDAction action = MDAction.valueOf(argumentParser.get("action")); - log.info("Curren action is {}", action); + log.info("Current action is {}", action); final String mdStoreManagerURI = argumentParser.get("mdStoreManagerURI"); log.info("mdStoreManagerURI is {}", mdStoreManagerURI); @@ -67,7 +64,7 @@ public class MDStoreActionNode { } final MDStoreVersion currentVersion = DNetRestClient .doGET(String.format(NEW_VERSION_URI, mdStoreManagerURI, mdStoreID), MDStoreVersion.class); - populateOOZIEEnv(MDSTOREVERSIONPARAM, mapper.writeValueAsString(currentVersion)); + populateOOZIEEnv(MDSTOREVERSIONPARAM, MAPPER.writeValueAsString(currentVersion)); break; } case COMMIT: { @@ -77,7 +74,7 @@ public class MDStoreActionNode { throw new IllegalArgumentException("missing or empty argument namenode"); } final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); - final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); + final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { throw new IllegalArgumentException( @@ -110,7 +107,7 @@ public class MDStoreActionNode { } case ROLLBACK: { final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); - final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); + final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { throw new IllegalArgumentException( @@ -127,12 +124,12 @@ public class MDStoreActionNode { } final MDStoreVersion currentVersion = DNetRestClient .doGET(String.format(READ_LOCK_URL, mdStoreManagerURI, mdStoreID), MDStoreVersion.class); - populateOOZIEEnv(MDSTOREREADLOCKPARAM, mapper.writeValueAsString(currentVersion)); + populateOOZIEEnv(MDSTOREREADLOCKPARAM, MAPPER.writeValueAsString(currentVersion)); break; } case READ_UNLOCK: { final String mdStoreVersion_params = argumentParser.get("readMDStoreId"); - final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); + final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { throw new IllegalArgumentException( @@ -148,13 +145,4 @@ public class MDStoreActionNode { } - public static void populateOOZIEEnv(final String paramName, String value) throws Exception { - File file = new File(System.getProperty("oozie.action.output.properties")); - Properties props = new Properties(); - - props.setProperty(paramName, value); - OutputStream os = new FileOutputStream(file); - props.store(os, ""); - os.close(); - } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index ba9bd662e0..a0c546858d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -4,9 +4,12 @@ package eu.dnetlib.dhp.collection.plugin; import java.util.stream.Stream; import eu.dnetlib.dhp.collection.worker.CollectorException; +import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; public interface CollectorPlugin { Stream collect(ApiDescriptor api) throws CollectorException; + + CollectorPluginErrorLogList getCollectionErrors(); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index a5e2615536..ea74919c5a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -9,12 +9,15 @@ import java.util.Spliterators; import java.util.stream.Stream; import java.util.stream.StreamSupport; +import org.jetbrains.annotations.NotNull; + import com.google.common.base.Splitter; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.worker.CollectorException; +import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; public class OaiCollectorPlugin implements CollectorPlugin { @@ -26,8 +29,19 @@ public class OaiCollectorPlugin implements CollectorPlugin { private OaiIteratorFactory oaiIteratorFactory; + private final CollectorPluginErrorLogList errorLogList = new CollectorPluginErrorLogList(); + @Override public Stream collect(final ApiDescriptor api) throws CollectorException { + try { + return doCollect(api); + } catch (CollectorException e) { + errorLogList.add(e.getMessage()); + throw e; + } + } + + private Stream doCollect(ApiDescriptor api) throws CollectorException { final String baseUrl = api.getBaseUrl(); final String mdFormat = api.getParams().get(FORMAT_PARAM); final String setParam = api.getParams().get(OAI_SET_PARAM); @@ -65,7 +79,7 @@ public class OaiCollectorPlugin implements CollectorPlugin { .stream() .map( set -> getOaiIteratorFactory() - .newIterator(baseUrl, mdFormat, set, fromDate, untilDate)) + .newIterator(baseUrl, mdFormat, set, fromDate, untilDate, errorLogList)) .iterator(); return StreamSupport @@ -79,4 +93,9 @@ public class OaiCollectorPlugin implements CollectorPlugin { } return oaiIteratorFactory; } + + @Override + public CollectorPluginErrorLogList getCollectionErrors() { + return errorLogList; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index e54bae67d4..2392dee6a8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -15,15 +15,17 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.collection.worker.CollectorException; +import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList; import eu.dnetlib.dhp.collection.worker.utils.HttpConnector; import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner; public class OaiIterator implements Iterator { - private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on - // 11/24/08 5:02 PM + private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); private final Queue queue = new PriorityBlockingQueue<>(); private final SAXReader reader = new SAXReader(); @@ -36,6 +38,7 @@ public class OaiIterator implements Iterator { private String token; private boolean started; private final HttpConnector httpConnector; + private CollectorPluginErrorLogList errorLogList; public OaiIterator( final String baseUrl, @@ -43,7 +46,8 @@ public class OaiIterator implements Iterator { final String set, final String fromDate, final String untilDate, - final HttpConnector httpConnector) { + final HttpConnector httpConnector, + final CollectorPluginErrorLogList errorLogList) { this.baseUrl = baseUrl; this.mdFormat = mdFormat; this.set = set; @@ -51,6 +55,7 @@ public class OaiIterator implements Iterator { this.untilDate = untilDate; this.started = false; this.httpConnector = httpConnector; + this.errorLogList = errorLogList; } private void verifyStarted() { @@ -139,7 +144,7 @@ public class OaiIterator implements Iterator { private String downloadPage(final String url) throws CollectorException { - final String xml = httpConnector.getInputSource(url); + final String xml = httpConnector.getInputSource(url, errorLogList); Document doc; try { doc = reader.read(new StringReader(xml)); @@ -174,4 +179,8 @@ public class OaiIterator implements Iterator { return doc.valueOf("//*[local-name()='resumptionToken']"); } + + public CollectorPluginErrorLogList getErrorLogList() { + return errorLogList; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java index 4a6ea7f67b..eafd265d4f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.util.Iterator; +import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList; import eu.dnetlib.dhp.collection.worker.utils.HttpConnector; public class OaiIteratorFactory { @@ -14,8 +15,9 @@ public class OaiIteratorFactory { final String mdFormat, final String set, final String fromDate, - final String untilDate) { - return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector()); + final String untilDate, + final CollectorPluginErrorLogList errorLogList) { + return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector(), errorLogList); } private HttpConnector getHttpConnector() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java index 3605bdfd6c..7033cfd8e2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java @@ -15,6 +15,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; @@ -22,69 +23,65 @@ public class CollectorWorker { private static final Logger log = LoggerFactory.getLogger(CollectorWorker.class); - private final CollectorPluginFactory collectorPluginFactory; - private final ApiDescriptor api; private final String hdfsuri; private final String hdfsPath; + private CollectorPlugin plugin; + public CollectorWorker( - final CollectorPluginFactory collectorPluginFactory, final ApiDescriptor api, final String hdfsuri, - final String hdfsPath) { - this.collectorPluginFactory = collectorPluginFactory; + final String hdfsPath) throws CollectorException { this.api = api; this.hdfsuri = hdfsuri; this.hdfsPath = hdfsPath; - + this.plugin = CollectorPluginFactory.getPluginByProtocol(api.getProtocol()); } - public void collect() throws CollectorException { - try { - final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol()); + public CollectorPluginErrorLogList collect() throws IOException, CollectorException { - // ====== Init HDFS File System Object - Configuration conf = new Configuration(); - // Set FileSystem URI - conf.set("fs.defaultFS", hdfsuri); - // Because of Maven - conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); - conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + // ====== Init HDFS File System Object + Configuration conf = new Configuration(); + // Set FileSystem URI + conf.set("fs.defaultFS", hdfsuri); + // Because of Maven + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - System.setProperty("hadoop.home.dir", "/"); - // Get the filesystem - HDFS - FileSystem.get(URI.create(hdfsuri), conf); - Path hdfswritepath = new Path(hdfsPath); + System.setProperty("hadoop.home.dir", "/"); + // Get the filesystem - HDFS - log.info("Created path " + hdfswritepath.toString()); + FileSystem.get(URI.create(hdfsuri), conf); + Path hdfswritepath = new Path(hdfsPath); - final AtomicInteger counter = new AtomicInteger(0); - try (SequenceFile.Writer writer = SequenceFile - .createWriter( - conf, - SequenceFile.Writer.file(hdfswritepath), - SequenceFile.Writer.keyClass(IntWritable.class), - SequenceFile.Writer.valueClass(Text.class))) { - final IntWritable key = new IntWritable(counter.get()); - final Text value = new Text(); - plugin - .collect(api) - .forEach( - content -> { - key.set(counter.getAndIncrement()); - value.set(content); - try { - writer.append(key, value); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } - } catch (Throwable e) { - throw new CollectorException("Error on collecting ", e); + log.info("Created path " + hdfswritepath.toString()); + + final AtomicInteger counter = new AtomicInteger(0); + try (SequenceFile.Writer writer = SequenceFile + .createWriter( + conf, + SequenceFile.Writer.file(hdfswritepath), + SequenceFile.Writer.keyClass(IntWritable.class), + SequenceFile.Writer.valueClass(Text.class))) { + final IntWritable key = new IntWritable(counter.get()); + final Text value = new Text(); + plugin + .collect(api) + .forEach( + content -> { + key.set(counter.getAndIncrement()); + value.set(content); + try { + writer.append(key, value); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } finally { + return plugin.getCollectionErrors(); } } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java index da5b197d64..1d99689db2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java @@ -2,6 +2,8 @@ package eu.dnetlib.dhp.collection.worker; import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; +import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*; +import static eu.dnetlib.dhp.application.ApplicationUtils.*; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; @@ -10,7 +12,9 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; +import eu.dnetlib.dhp.aggregation.common.AggregationUtility; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; @@ -25,8 +29,6 @@ public class CollectorWorkerApplication { private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class); - private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory(); - /** * @param args */ @@ -49,14 +51,16 @@ public class CollectorWorkerApplication { final String mdStoreVersion = argumentParser.get("mdStoreVersion"); log.info("mdStoreVersion is {}", mdStoreVersion); - final ObjectMapper jsonMapper = new ObjectMapper(); + final MDStoreVersion currentVersion = MAPPER.readValue(mdStoreVersion, MDStoreVersion.class); + final String hdfsPath = currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME; + log.info("hdfs path is {}", hdfsPath); - final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); + final ApiDescriptor api = MAPPER.readValue(apiDescriptor, ApiDescriptor.class); - final ApiDescriptor api = jsonMapper.readValue(apiDescriptor, ApiDescriptor.class); - final CollectorWorker worker = new CollectorWorker(collectorPluginFactory, api, hdfsuri, - currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME); - worker.collect(); + final CollectorWorker worker = new CollectorWorker(api, hdfsuri, hdfsPath); + CollectorPluginErrorLogList errors = worker.collect(); + + populateOOZIEEnv("collectorErrors", errors.toString()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java index 6b070b191c..7cbcd9b5c2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java @@ -7,7 +7,7 @@ import eu.dnetlib.dhp.collection.worker.CollectorException; public class CollectorPluginFactory { - public CollectorPlugin getPluginByProtocol(final String protocol) throws CollectorException { + public static CollectorPlugin getPluginByProtocol(final String protocol) throws CollectorException { if (protocol == null) throw new CollectorException("protocol cannot be null"); switch (protocol.toLowerCase().trim()) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java index ff3c18abad..fc45b4814d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java @@ -16,14 +16,14 @@ import javax.net.ssl.X509TrustManager; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.math.NumberUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.collection.worker.CollectorException; public class HttpConnector { - private static final Log log = LogFactory.getLog(HttpConnector.class); + private static final Logger log = LoggerFactory.getLogger(HttpConnector.class); private int maxNumberOfRetry = 6; private int defaultDelay = 120; // seconds @@ -45,7 +45,20 @@ public class HttpConnector { * @throws CollectorException when retrying more than maxNumberOfRetry times */ public String getInputSource(final String requestUrl) throws CollectorException { - return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList()); + return attemptDownloadAsString(requestUrl, 1, new CollectorPluginErrorLogList()); + } + + /** + * Given the URL returns the content via HTTP GET + * + * @param requestUrl the URL + * @param errorLogList the list of errors + * @return the content of the downloaded resource + * @throws CollectorException when retrying more than maxNumberOfRetry times + */ + public String getInputSource(final String requestUrl, CollectorPluginErrorLogList errorLogList) + throws CollectorException { + return attemptDownloadAsString(requestUrl, 1, errorLogList); } /** @@ -59,18 +72,20 @@ public class HttpConnector { return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); } - private String attemptDownlaodAsString( + private String attemptDownloadAsString( final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList) throws CollectorException { + + log.info("requesting URL [{}]", requestUrl); try { final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); try { return IOUtils.toString(s); } catch (final IOException e) { - log.error("error while retrieving from http-connection occured: " + requestUrl, e); + log.error("error while retrieving from http-connection occurred: {}", requestUrl, e); Thread.sleep(defaultDelay * 1000); errorList.add(e.getMessage()); - return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList); + return attemptDownloadAsString(requestUrl, retryNumber + 1, errorList); } finally { IOUtils.closeQuietly(s); } @@ -87,7 +102,7 @@ public class HttpConnector { throw new CollectorException("Max number of retries exceeded. Cause: \n " + errorList); } - log.debug("Downloading " + requestUrl + " - try: " + retryNumber); + log.debug("requesting URL [{}], try {}", requestUrl, retryNumber); try { InputStream input = null; @@ -103,7 +118,7 @@ public class HttpConnector { final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { - log.warn("waiting and repeating request after " + retryAfter + " sec."); + log.warn("waiting and repeating request after {} sec.", retryAfter); Thread.sleep(retryAfter * 1000); errorList.add("503 Service Unavailable"); urlConn.disconnect(); @@ -111,7 +126,7 @@ public class HttpConnector { } else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM || urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) { final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); - log.debug("The requested url has been moved to " + newUrl); + log.debug("The requested url has been moved to {}", newUrl); errorList .add( String @@ -121,15 +136,11 @@ public class HttpConnector { urlConn.disconnect(); return attemptDownload(newUrl, retryNumber + 1, errorList); } else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) { - log - .error( - String - .format( - "HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); + final String msg = String + .format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()); + log.error(msg); Thread.sleep(defaultDelay * 1000); - errorList - .add( - String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); + errorList.add(msg); urlConn.disconnect(); return attemptDownload(requestUrl, retryNumber + 1, errorList); } else { @@ -138,7 +149,7 @@ public class HttpConnector { return input; } } catch (final IOException e) { - log.error("error while retrieving from http-connection occured: " + requestUrl, e); + log.error("error while retrieving from http-connection occurred: {}", requestUrl, e); Thread.sleep(defaultDelay * 1000); errorList.add(e.getMessage()); return attemptDownload(requestUrl, retryNumber + 1, errorList); @@ -149,12 +160,12 @@ public class HttpConnector { } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: " + urlConn.getResponseMessage()); + log.debug("StatusCode: {}", urlConn.getResponseMessage()); for (final Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (final String v : e.getValue()) { - log.debug(" key: " + e.getKey() + " - value: " + v); + log.debug(" key: {} value: {}", e.getKey(), v); } } } @@ -183,37 +194,6 @@ public class HttpConnector { "The requested url has been MOVED, but 'location' param is MISSING"); } - /** - * register for https scheme; this is a workaround and not intended for the use in trusted environments - */ - public void initTrustManager() { - final X509TrustManager tm = new X509TrustManager() { - - @Override - public void checkClientTrusted(final X509Certificate[] xcs, final String string) { - } - - @Override - public void checkServerTrusted(final X509Certificate[] xcs, final String string) { - } - - @Override - public X509Certificate[] getAcceptedIssuers() { - return null; - } - }; - try { - final SSLContext ctx = SSLContext.getInstance("TLS"); - ctx.init(null, new TrustManager[] { - tm - }, null); - HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory()); - } catch (final GeneralSecurityException e) { - log.fatal(e); - throw new IllegalStateException(e); - } - } - public int getMaxNumberOfRetry() { return maxNumberOfRetry; } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java index 9abfbacacf..10964096cc 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java @@ -40,7 +40,7 @@ public class DnetCollectorWorkerApplicationTests { public void testFeeding(@TempDir Path testDir) throws Exception { System.out.println(testDir.toString()); - CollectorWorker worker = new CollectorWorker(new CollectorPluginFactory(), getApi(), + CollectorWorker worker = new CollectorWorker(getApi(), "file://" + testDir.toString() + "/file.seq", testDir.toString() + "/file.seq"); worker.collect();