oozie workflow for migrating the native records from the mongodb-based mdstores to the hadoop-based ones, testing phase

This commit is contained in:
Claudio Atzori 2022-04-11 16:04:27 +02:00
parent 998262321c
commit f249f9d00c
1 changed files with 19 additions and 3 deletions

View File

@ -25,6 +25,7 @@ import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
@ -144,10 +145,25 @@ public class MigrateNativeStoreSparkJob {
.atZone(ZoneId.systemDefault())
.toLocalDate();
final Node nativeRecord = document
.selectSingleNode("/*[local-name() = 'record']/*[local-name() = 'metadata']/*");
document
.selectSingleNode(
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'objIdentifier']")
.detach();
document
.selectSingleNode(
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']")
.detach();
document
.selectSingleNode(
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'dateOfCollection']")
.detach();
document
.selectSingleNode(
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'datasourceprefix']")
.detach();
document.selectSingleNode("/*[local-name() = 'record']/*[local-name() = 'about']").detach();
return new MetadataRecord(id, encoding, provenance, nativeRecord.asXML(), date.toEpochDay());
return new MetadataRecord(id, encoding, provenance, document.asXML(), date.toEpochDay());
} catch (Throwable e) {
invalidRecords.add(1);
return null;