oozie workflow for migrating the native records from the mongodb-based mdstores to the hadoop-based ones, testing phase
This commit is contained in:
parent
998262321c
commit
f249f9d00c
|
@ -25,6 +25,7 @@ import org.apache.spark.sql.Encoders;
|
|||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -144,10 +145,25 @@ public class MigrateNativeStoreSparkJob {
|
|||
.atZone(ZoneId.systemDefault())
|
||||
.toLocalDate();
|
||||
|
||||
final Node nativeRecord = document
|
||||
.selectSingleNode("/*[local-name() = 'record']/*[local-name() = 'metadata']/*");
|
||||
document
|
||||
.selectSingleNode(
|
||||
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'objIdentifier']")
|
||||
.detach();
|
||||
document
|
||||
.selectSingleNode(
|
||||
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']")
|
||||
.detach();
|
||||
document
|
||||
.selectSingleNode(
|
||||
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'dateOfCollection']")
|
||||
.detach();
|
||||
document
|
||||
.selectSingleNode(
|
||||
"/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'datasourceprefix']")
|
||||
.detach();
|
||||
document.selectSingleNode("/*[local-name() = 'record']/*[local-name() = 'about']").detach();
|
||||
|
||||
return new MetadataRecord(id, encoding, provenance, nativeRecord.asXML(), date.toEpochDay());
|
||||
return new MetadataRecord(id, encoding, provenance, document.asXML(), date.toEpochDay());
|
||||
} catch (Throwable e) {
|
||||
invalidRecords.add(1);
|
||||
return null;
|
||||
|
|
Loading…
Reference in New Issue