From f249f9d00c1c5c4a437d4904803284667bc98a54 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 Apr 2022 16:04:27 +0200 Subject: [PATCH] oozie workflow for migrating the native records from the mongodb-based mdstores to the hadoop-based ones, testing phase --- .../migration/MigrateNativeStoreSparkJob.java | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateNativeStoreSparkJob.java index 1da0892b3..000567bf7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateNativeStoreSparkJob.java @@ -25,6 +25,7 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; +import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; @@ -144,10 +145,25 @@ public class MigrateNativeStoreSparkJob { .atZone(ZoneId.systemDefault()) .toLocalDate(); - final Node nativeRecord = document - .selectSingleNode("/*[local-name() = 'record']/*[local-name() = 'metadata']/*"); + document + .selectSingleNode( + "/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'objIdentifier']") + .detach(); + document + .selectSingleNode( + "/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']") + .detach(); + document + .selectSingleNode( + "/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'dateOfCollection']") + .detach(); + document + .selectSingleNode( + "/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'datasourceprefix']") + .detach(); + document.selectSingleNode("/*[local-name() = 'record']/*[local-name() = 'about']").detach(); - return new MetadataRecord(id, encoding, provenance, nativeRecord.asXML(), date.toEpochDay()); + return new MetadataRecord(id, encoding, provenance, document.asXML(), date.toEpochDay()); } catch (Throwable e) { invalidRecords.add(1); return null;