From fbb0fc140b7f8b5e3d16c78d7df8fdbd92e8b5f3 Mon Sep 17 00:00:00 2001 From: Michele Artini Date: Tue, 4 Feb 2020 15:25:47 +0100 Subject: [PATCH] partial implementation of migration --- ...on.java => AbstractMigrationExecutor.java} | 4 +- .../dhp/migration/AbstractMongoExecutor.java | 369 ++++++++++++++++++ .../MigrateDbEntitiesApplication.java | 2 +- .../MigrateMongoMdstoresApplication.java | 339 +--------------- .../dnetlib/dhp/migration/MigrationUtils.java | 154 -------- .../dhp/migration/OafMigrationExecutor.java | 246 ++++++++++++ 6 files changed, 626 insertions(+), 488 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/{AbstractMigrateApplication.java => AbstractMigrationExecutor.java} (97%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrationUtils.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrateApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrationExecutor.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrateApplication.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrationExecutor.java index 73ee7f822..389790511 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrateApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrationExecutor.java @@ -30,7 +30,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.utils.DHPUtils; -public class AbstractMigrateApplication implements Closeable { +public class AbstractMigrationExecutor implements Closeable { private final AtomicInteger counter = new AtomicInteger(0); @@ -42,7 +42,7 @@ public class AbstractMigrateApplication implements Closeable { private final SequenceFile.Writer writer; - public AbstractMigrateApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser) throws Exception { + public AbstractMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser) throws Exception { this.writer = SequenceFile.createWriter(getConf(hdfsNameNode, hdfsUser), SequenceFile.Writer.file(new Path(hdfsPath)), SequenceFile.Writer .keyClass(IntWritable.class), SequenceFile.Writer.valueClass(Text.class)); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java new file mode 100644 index 000000000..51c39824a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java @@ -0,0 +1,369 @@ +package eu.dnetlib.dhp.migration; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.lang3.StringUtils; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.DocumentFactory; +import org.dom4j.DocumentHelper; +import org.dom4j.Node; + +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OAIProvenance; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { + + protected final Map code2name = new HashMap<>(); + + protected final MdstoreClient mdstoreClient; + + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + + protected static final Qualifier PUBLICATION_RESULTTYPE_QUALIFIER = + qualifier("publication", "publication", "dnet:result_typologies", "dnet:result_typologies"); + protected static final Qualifier DATASET_RESULTTYPE_QUALIFIER = qualifier("dataset", "dataset", "dnet:result_typologies", "dnet:result_typologies"); + protected static final Qualifier SOFTWARE_RESULTTYPE_QUALIFIER = qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies"); + protected static final Qualifier OTHER_RESULTTYPE_QUALIFIER = qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies"); + + public AbstractMongoExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, + final String mongoDb, final String dbUrl, final String dbUser, + final String dbPassword) throws Exception { + + super(hdfsPath, hdfsNameNode, hdfsUser); + + this.mdstoreClient = new MdstoreClient(mongoBaseUrl, mongoDb); + loadClassNames(dbUrl, dbUser, dbPassword); + + final Map nsContext = new HashMap<>(); + + registerNamespaces(nsContext); + nsContext.put("dc", "http://purl.org/dc/elements/1.1/"); + nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); + nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); + nsContext.put("oaf", "http://namespace.openaire.eu/oaf"); + nsContext.put("oai", "http://www.openarchives.org/OAI/2.0/"); + nsContext.put("prov", "http://www.openarchives.org/OAI/2.0/provenance"); + DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); + } + + private void loadClassNames(final String dbUrl, final String dbUser, final String dbPassword) throws IOException { + try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { + code2name.clear(); + dbClient.processResults("select code, name from class", rs -> { + try { + code2name.put(rs.getString("code"), rs.getString("name")); + } catch (final SQLException e) { + e.printStackTrace(); + } + }); + } + + } + + public void processMdRecords(final String mdFormat, final String mdLayout, final String mdInterpretation) throws DocumentException { + + for (final Entry entry : mdstoreClient.validCollections(mdFormat, mdLayout, mdInterpretation).entrySet()) { + // final String mdId = entry.getKey(); + final String currentColl = entry.getValue(); + + for (final String xml : mdstoreClient.listRecords(currentColl)) { + final Document doc = DocumentHelper.parseText(xml); + + final String type = doc.valueOf("//dr:CobjCategory/@type"); + final KeyValue collectedFrom = keyValue(doc.valueOf("//oaf:collectedFrom/@id"), doc.valueOf("//oaf:collectedFrom/@name")); + final DataInfo info = prepareDataInfo(doc); + final long lastUpdateTimestamp = new Date().getTime(); + + for (final Oaf oaf : createOafs(doc, type, collectedFrom, info, lastUpdateTimestamp)) { + emitOaf(oaf); + } + } + } + } + + protected abstract void registerNamespaces(Map nsContext); + + protected List createOafs(final Document doc, final String type, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { + + final List oafs = new ArrayList<>(); + + switch (type.toLowerCase()) { + case "": + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, collectedFrom, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_RESULTTYPE_QUALIFIER); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, collectedFrom, info, lastUpdateTimestamp); + d.setResulttype(DATASET_RESULTTYPE_QUALIFIER); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, collectedFrom, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_RESULTTYPE_QUALIFIER); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, collectedFrom, info, lastUpdateTimestamp); + o.setResulttype(OTHER_RESULTTYPE_QUALIFIER); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; + } + + if (!oafs.isEmpty()) { + addRelations(oafs, doc, "TYPE", collectedFrom, info, lastUpdateTimestamp); // TODO + addRelations(oafs, doc, "TYPE", collectedFrom, info, lastUpdateTimestamp); // TODO + addRelations(oafs, doc, "TYPE", collectedFrom, info, lastUpdateTimestamp); // TODO + } + + return oafs; + } + + private void populateResultFields(final Result r, final Document doc, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { + r.setDataInfo(info); + r.setLastupdatetimestamp(lastUpdateTimestamp); + r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"))); + r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); + r.setCollectedfrom(Arrays.asList(collectedFrom)); + r.setPid(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); + r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); + r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES + r.setOaiprovenance(prepareOAIprovenance(doc)); + r.setAuthor(prepareAuthors(doc, info)); + r.setLanguage(prepareLanguages(doc)); + r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES + r.setSubject(prepareSubjects(doc, info)); + r.setTitle(prepareTitles(doc, info)); + r.setRelevantdate(prepareRelevantDates(doc, info)); + r.setDescription(prepareDescriptions(doc, info)); + r.setDateofacceptance(prepareField(doc, "//oaf:dateAccepted", info)); + r.setPublisher(preparePublisher(doc, info)); + r.setEmbargoenddate(prepareEmbargoEndDate(doc, info)); + r.setSource(prepareSources(doc, info)); + r.setFulltext(null); // NOT PRESENT IN MDSTORES + r.setFormat(prepareFormats(doc, info)); + r.setContributor(prepareContributors(doc, info)); + r.setResourcetype(null); // TODO + r.setCoverage(prepareCoverages(doc, info)); + r.setRefereed(null); // TODO + r.setContext(null); // TODO + r.setExternalReference(null); // TODO + r.setInstance(prepareInstances(doc, info)); + r.setProcessingchargeamount(null); // TODO + r.setProcessingchargecurrency(null); // TODO + } + + protected abstract List prepareInstances(Document doc, DataInfo info); + + protected abstract List> prepareSources(Document doc, DataInfo info); + + protected abstract Field prepareEmbargoEndDate(Document doc, DataInfo info); + + protected abstract List prepareRelevantDates(Document doc, DataInfo info); + + protected abstract List> prepareCoverages(Document doc, DataInfo info); + + protected abstract List> prepareContributors(Document doc, DataInfo info); + + protected abstract List> prepareFormats(Document doc, DataInfo info); + + protected abstract Field preparePublisher(Document doc, DataInfo info); + + protected abstract List> prepareDescriptions(Document doc, DataInfo info); + + protected abstract List prepareTitles(Document doc, DataInfo info); + + protected abstract List prepareSubjects(Document doc, DataInfo info); + + protected abstract Qualifier prepareLanguages(Document doc); + + protected abstract List prepareAuthors(Document doc, DataInfo info); + + protected abstract List> prepareOtherResearchProductTools(Document doc, DataInfo info); + + protected abstract List> prepareOtherResearchProductContactGroups(Document doc, DataInfo info); + + protected abstract List> prepareOtherResearchProductContactPersons(Document doc, DataInfo info); + + protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); + + protected abstract Field prepareSoftwareCodeRepositoryUrl(Document doc, DataInfo info); + + protected abstract List prepareSoftwareLicenses(Document doc, DataInfo info); + + protected abstract List> prepareSoftwareDocumentationUrls(Document doc, DataInfo info); + + protected abstract List prepareDatasetGeoLocations(Document doc, DataInfo info); + + protected abstract Field prepareDatasetMetadataVersionNumber(Document doc, DataInfo info); + + protected abstract Field prepareDatasetLastMetadataUpdate(Document doc, DataInfo info); + + protected abstract Field prepareDatasetVersion(Document doc, DataInfo info); + + protected abstract Field prepareDatasetSize(Document doc, DataInfo info); + + protected abstract Field prepareDatasetDevice(Document doc, DataInfo info); + + protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); + + abstract protected void addRelations(final List oafs, + final Document doc, + final String type, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp); + + private Journal prepareJournal(final Document doc, final DataInfo info) { + final Node n = doc.selectSingleNode("//oaf:journal"); + if (n != null) { + final String name = n.getText(); + final String issnPrinted = n.valueOf("@issn"); + final String issnOnline = n.valueOf("@eissn"); + final String issnLinking = n.valueOf("@lissn"); + if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, null, null, null, null, null, null, null, info); } + } + return null; + } + + protected Qualifier prepareQualifier(final Document doc, final String xpath, final String schemeId, final String schemeName) { + final String classId = doc.valueOf(xpath); + final String className = code2name.get(classId); + return qualifier(classId, className, schemeId, schemeName); + } + + protected List prepareListStructProps(final Document doc, + final String xpath, + final String xpathClassId, + final String schemeId, + final String schemeName, + final DataInfo info) { + final List res = new ArrayList<>(); + for (final Object o : doc.selectNodes(xpath)) { + final Node n = (Node) o; + final String classId = n.valueOf(xpathClassId); + final String className = code2name.get(classId); + res.add(structuredProperty(n.getText(), classId, className, schemeId, schemeName, info)); + } + return res; + } + + protected List prepareListStructProps(final Document doc, final String xpath, final Qualifier qualifier, final DataInfo info) { + final List res = new ArrayList<>(); + for (final Object o : doc.selectNodes(xpath)) { + final Node n = (Node) o; + res.add(structuredProperty(n.getText(), qualifier, info)); + } + return res; + } + + protected List prepareListStructProps(final Document doc, final String xpath, final DataInfo info) { + final List res = new ArrayList<>(); + for (final Object o : doc.selectNodes(xpath)) { + final Node n = (Node) o; + res.add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n + .valueOf("@schemename"), info)); + } + return res; + } + + protected OAIProvenance prepareOAIprovenance(final Document doc) { + final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); + + final String identifier = n.valueOf("./*[local-name()='identifier']"); + final String baseURL = n.valueOf("./*[local-name()='baseURL']");; + final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");; + final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true"); + final String datestamp = n.valueOf("./*[local-name()='datestamp']");; + final String harvestDate = n.valueOf("@harvestDate");; + + return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); + } + + protected DataInfo prepareDataInfo(final Document doc) { + final Node n = doc.selectSingleNode("//oaf:datainfo"); + + final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); + final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); + final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid"); + final String paSchemeName = n.valueOf("./oaf:provenanceaction/@schemename"); + + final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference")); + final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance"); + final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); + final String trust = n.valueOf("./oaf:trust"); + + return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + } + + protected Field prepareField(final Document doc, final String xpath, final DataInfo info) { + return field(doc.valueOf(xpath), info); + } + + protected List> prepareListFields(final Document doc, final String xpath, final DataInfo info) { + return listFields(info, (String[]) prepareListString(doc, xpath).toArray()); + } + + protected List prepareListString(final Document doc, final String xpath) { + final List res = new ArrayList<>(); + for (final Object o : doc.selectNodes(xpath)) { + final String s = ((Node) o).getText().trim(); + if (StringUtils.isNotBlank(s)) { + res.add(s); + } + } + return res; + } + + @Override + public void close() throws IOException { + super.close(); + mdstoreClient.close(); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateDbEntitiesApplication.java index 0b47c5282..12043709f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateDbEntitiesApplication.java @@ -28,7 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable { +public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor implements Closeable { private static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = qualifier("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry", "dnet:provenance_actions", "dnet:provenance_actions"); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateMongoMdstoresApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateMongoMdstoresApplication.java index f6dcaf0e8..124a4f3cc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateMongoMdstoresApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateMongoMdstoresApplication.java @@ -1,56 +1,10 @@ package eu.dnetlib.dhp.migration; -import java.io.Closeable; -import java.io.IOException; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.DocumentFactory; -import org.dom4j.DocumentHelper; -import org.dom4j.Node; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OAIProvenance; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class MigrateMongoMdstoresApplication extends AbstractMigrateApplication implements Closeable { - - private static final Log log = LogFactory.getLog(MigrateMongoMdstoresApplication.class); - - private final Map code2name = new HashMap<>(); - - private final MdstoreClient mdstoreClient; - - private static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); - - private static final Qualifier PUBLICATION_RESULTTYPE_QUALIFIER = - qualifier("publication", "publication", "dnet:result_typologies", "dnet:result_typologies"); - private static final Qualifier DATASET_RESULTTYPE_QUALIFIER = qualifier("dataset", "dataset", "dnet:result_typologies", "dnet:result_typologies"); - private static final Qualifier SOFTWARE_RESULTTYPE_QUALIFIER = qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies"); - private static final Qualifier OTHER_RESULTTYPE_QUALIFIER = qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies"); +public class MigrateMongoMdstoresApplication { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -72,294 +26,17 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrateApplication final String dbUser = parser.get("postgresUser"); final String dbPassword = parser.get("postgresPassword"); - try (final MigrateMongoMdstoresApplication mig = - new MigrateMongoMdstoresApplication(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword)) { - mig.processMdRecords(mdFormat, mdLayout, mdInterpretation); - } - - } - - public MigrateMongoMdstoresApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, - final String mongoDb, final String dbUrl, final String dbUser, - final String dbPassword) throws Exception { - super(hdfsPath, hdfsNameNode, hdfsUser); - - this.mdstoreClient = new MdstoreClient(mongoBaseUrl, mongoDb); - loadClassNames(dbUrl, dbUser, dbPassword); - - final Map nsContext = new HashMap<>(); - nsContext.put("dc", "http://purl.org/dc/elements/1.1/"); - nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); - nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); - nsContext.put("oaf", "http://namespace.openaire.eu/oaf"); - nsContext.put("oai", "http://www.openarchives.org/OAI/2.0/"); - nsContext.put("prov", "http://www.openarchives.org/OAI/2.0/provenance"); - DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); - } - - private void loadClassNames(final String dbUrl, final String dbUser, final String dbPassword) throws IOException { - try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { - code2name.clear(); - dbClient.processResults("select code, name from class", rs -> { - try { - code2name.put(rs.getString("code"), rs.getString("name")); - } catch (final SQLException e) { - e.printStackTrace(); - } - }); - } - - } - - public void processMdRecords(final String mdFormat, final String mdLayout, final String mdInterpretation) throws DocumentException { - - for (final Entry entry : mdstoreClient.validCollections(mdFormat, mdLayout, mdInterpretation).entrySet()) { - // final String mdId = entry.getKey(); - final String currentColl = entry.getValue(); - - for (final String xml : mdstoreClient.listRecords(currentColl)) { - for (final Oaf oaf : createOafs(xml)) { - emitOaf(oaf); - } + if (mdFormat.equalsIgnoreCase("oaf")) { + try (final OafMigrationExecutor mig = + new OafMigrationExecutor(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword)) { + mig.processMdRecords(mdFormat, mdLayout, mdInterpretation); } - } - } + } else if (mdFormat.equalsIgnoreCase("oaf")) { - private List createOafs(final String xml) throws DocumentException { - - final Document doc = DocumentHelper.parseText(xml); - - final String type = doc.valueOf("//dr:CobjCategory/@type"); - final KeyValue collectedFrom = keyValue(doc.valueOf("//oaf:collectedFrom/@id"), doc.valueOf("//oaf:collectedFrom/@name")); - final DataInfo info = prepareDataInfo(doc); - final long lastUpdateTimestamp = new Date().getTime(); - - final List oafs = new ArrayList<>(); - - switch (type.toLowerCase()) { - case "": - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_RESULTTYPE_QUALIFIER); - p.setJournal(null); // TODO - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, info, lastUpdateTimestamp); - d.setResulttype(DATASET_RESULTTYPE_QUALIFIER); - d.setStoragedate(null); // TODO - d.setDevice(null); // TODO - d.setSize(null); // TODO - d.setVersion(null); // TODO - d.setLastmetadataupdate(null); // TODO - d.setMetadataversionnumber(null); // TODO - d.setGeolocation(null); // TODO - oafs.add(d); - break; - case "otherresearchproducts": - - case "software": - final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_RESULTTYPE_QUALIFIER); - s.setDocumentationUrl(null); // TODO - s.setLicense(null); // TODO - s.setCodeRepositoryUrl(null); // TODO - s.setProgrammingLanguage(null); // TODO - oafs.add(s); - break; - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, info, lastUpdateTimestamp); - o.setResulttype(OTHER_RESULTTYPE_QUALIFIER); - o.setContactperson(null); // TODO - o.setContactgroup(null); // TODO - o.setTool(null); // TODO - oafs.add(o); - break; + } else { + throw new RuntimeException("Format not supported: " + mdFormat); } - if (!oafs.isEmpty()) { - addRelations(oafs, doc, "//*", "TYPE", collectedFrom, info, lastUpdateTimestamp); // TODO - addRelations(oafs, doc, "//*", "TYPE", collectedFrom, info, lastUpdateTimestamp); // TODO - addRelations(oafs, doc, "//*", "TYPE", collectedFrom, info, lastUpdateTimestamp); // TODO - } - - return oafs; - } - - private void addRelations(final List oafs, - final Document doc, - final String xpath, - final String type, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp) { - for (final Object o : doc.selectNodes(xpath)) { - final Node n = (Node) o; - final Relation r = new Relation(); - r.setRelType(null); // TODO - r.setSubRelType(null); // TODO - r.setRelClass(null); // TODO - r.setSource(null); // TODO - r.setTarget(null); // TODO - r.setCollectedFrom(Arrays.asList(collectedFrom)); - r.setDataInfo(info); - r.setLastupdatetimestamp(lastUpdateTimestamp); - oafs.add(r); - } - - } - - private void populateResultFields(final Result r, final Document doc, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { - - r.setDataInfo(info); - r.setLastupdatetimestamp(lastUpdateTimestamp); - r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"))); - r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); - r.setCollectedfrom(Arrays.asList(collectedFrom)); - r.setPid(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); - r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); - r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); - r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES - r.setOaiprovenance(prepareOAIprovenance(doc)); - r.setAuthor(null); // TODO - r.setLanguage(prepareQualifier(doc, "//dc:language", "dnet:languages", "dnet:languages")); - r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES - r.setSubject(prepareListStructProps(doc, "//dc:subject", info)); - r.setTitle(prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info)); - r.setRelevantdate(null); // TODO - r.setDescription(prepareListFields(doc, "//dc:description", info)); - r.setDateofacceptance(prepareField(doc, "//oaf:dateAccepted", info)); - r.setPublisher(prepareField(doc, "//dc:publisher", info)); - r.setEmbargoenddate(null); // TODO - r.setSource(null); // TODO - r.setFulltext(null); // TODO - r.setFormat(prepareListFields(doc, "//dc:format", info)); - r.setContributor(prepareListFields(doc, "//dc:contributor", info)); - r.setResourcetype(null); // TODO - r.setCoverage(prepareListFields(doc, "//dc:coverage", info)); - r.setRefereed(null); // TODO - r.setContext(null); // TODO - r.setExternalReference(null); // TODO - r.setInstance(null); // TODO - r.setProcessingchargeamount(null); // TODO - r.setProcessingchargecurrency(null); // TODO - } - - private Qualifier prepareQualifier(final Document doc, final String xpath, final String schemeId, final String schemeName) { - final String classId = doc.valueOf(xpath); - final String className = code2name.get(classId); - return qualifier(classId, className, schemeId, schemeName); - } - - private List prepareListStructProps(final Document doc, - final String xpath, - final String xpathClassId, - final String schemeId, - final String schemeName, - final DataInfo info) { - final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes(xpath)) { - final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId); - final String className = code2name.get(classId); - res.add(structuredProperty(n.getText(), classId, className, schemeId, schemeName, info)); - } - return res; - } - - private List prepareListStructProps(final Document doc, final String xpath, final Qualifier qualifier, final DataInfo info) { - final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes(xpath)) { - final Node n = (Node) o; - res.add(structuredProperty(n.getText(), qualifier, info)); - } - return res; - } - - private List prepareListStructProps(final Document doc, final String xpath, final DataInfo info) { - final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes(xpath)) { - final Node n = (Node) o; - res.add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n - .valueOf("@schemename"), info)); - } - return res; - } - - private OAIProvenance prepareOAIprovenance(final Document doc) { - final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - - final String identifier = n.valueOf("./*[local-name()='identifier']"); - final String baseURL = n.valueOf("./*[local-name()='baseURL']");; - final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");; - final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true"); - final String datestamp = n.valueOf("./*[local-name()='datestamp']");; - final String harvestDate = n.valueOf("@harvestDate");; - - return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); - } - - private DataInfo prepareDataInfo(final Document doc) { - final Node n = doc.selectSingleNode("//oaf:datainfo"); - - final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); - final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); - final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid"); - final String paSchemeName = n.valueOf("./oaf:provenanceaction/@schemename"); - - final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference")); - final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance"); - final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); - final String trust = n.valueOf("./oaf:trust"); - - return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); - } - - private Field prepareField(final Document doc, final String xpath, final DataInfo info) { - return field(doc.valueOf(xpath), info); - } - - private List> prepareListFields(final Document doc, final String xpath, final DataInfo info) { - return listFields(info, (String[]) prepareListString(doc, xpath).toArray()); - } - - private List prepareListString(final Document doc, final String xpath) { - final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes(xpath)) { - final String s = ((Node) o).getText().trim(); - if (StringUtils.isNotBlank(s)) { - res.add(s); - } - } - return res; - } - /* - * private StructuredProperty prepareStructProp(final Document doc, final String xpath, final DataInfo dataInfo) { if - * (StringUtils.isBlank(s)) { return null; } final String[] parts = s.split("###"); if (parts.length == 2) { final String value = - * parts[0]; final String[] arr = parts[1].split("@@@"); if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], - * arr[3], dataInfo); } } return null; } - * - * private List prepareListOfStructProps(final Document doc, final String xpath, final DataInfo dataInfo) { final - * List res = new ArrayList<>(); if (array != null) { for (final String s : (String[]) array.getArray()) { final - * StructuredProperty sp = prepareStructProp(s, dataInfo); if (sp != null) { res.add(sp); } } } - * - * return res; } - * - * private Journal prepareJournal(final Document doc, final String xpath, final DataInfo info) { if (StringUtils.isNotBlank(sj)) { final - * String[] arr = sj.split("@@@"); if (arr.length == 3) { final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null; final - * String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;; final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;; - * if (issn != null || eissn != null || lissn != null) { return journal(name, issn, eissn, eissn, null, null, null, null, null, null, - * null, info); } } } return null; } - */ - - @Override - public void close() throws IOException { - super.close(); - mdstoreClient.close(); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrationUtils.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrationUtils.java deleted file mode 100644 index c58688a79..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrationUtils.java +++ /dev/null @@ -1,154 +0,0 @@ -package eu.dnetlib.dhp.migration; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import org.apache.commons.lang3.StringUtils; - -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.ExtraInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OAIProvenance; -import eu.dnetlib.dhp.schema.oaf.OriginDescription; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.utils.DHPUtils; - -public class MigrationUtils { - - public static KeyValue keyValue(final String k, final String v) { - final KeyValue kv = new KeyValue(); - kv.setKey(k); - kv.setValue(v); - return kv; - } - - public static List listKeyValues(final String... s) { - if (s.length % 2 > 0) { throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); } - - final List list = new ArrayList<>(); - for (int i = 0; i < s.length; i += 2) { - list.add(keyValue(s[i], s[i + 1])); - } - return list; - } - - public static Field field(final T value, final DataInfo info) { - final Field field = new Field<>(); - field.setValue(value); - field.setDataInfo(info); - return field; - } - - public static List> listFields(final DataInfo info, final String... values) { - return Arrays.stream(values).map(v -> field(v, info)).collect(Collectors.toList()); - } - - public static Qualifier qualifier(final String classid, final String classname, final String schemeid, final String schemename) { - final Qualifier q = new Qualifier(); - q.setClassid(classid); - q.setClassname(classname); - q.setSchemeid(schemeid); - q.setSchemename(schemename); - return q; - } - - public static StructuredProperty structuredProperty(final String value, - final String classid, - final String classname, - final String schemeid, - final String schemename, - final DataInfo dataInfo) { - final StructuredProperty sp = new StructuredProperty(); - sp.setValue(value); - sp.setQualifier(qualifier(classid, classname, schemeid, schemename)); - sp.setDataInfo(dataInfo); - return sp; - } - - public static ExtraInfo extraInfo(final String name, final String value, final String typology, final String provenance, final String trust) { - final ExtraInfo info = new ExtraInfo(); - info.setName(name); - info.setValue(value); - info.setTypology(typology); - info.setProvenance(provenance); - info.setTrust(trust); - return info; - } - - public static OAIProvenance oaiIProvenance(final String identifier, - final String baseURL, - final String metadataNamespace, - final Boolean altered, - final String datestamp, - final String harvestDate) { - - final OriginDescription desc = new OriginDescription(); - desc.setIdentifier(identifier); - desc.setBaseURL(baseURL); - desc.setMetadataNamespace(metadataNamespace); - desc.setAltered(altered); - desc.setDatestamp(datestamp); - desc.setHarvestDate(harvestDate); - - final OAIProvenance p = new OAIProvenance(); - p.setOriginDescription(desc); - - return p; - } - - public static Journal journal(final String name, - final String issnPrinted, - final String issnOnline, - final String issnLinking, - final String ep, - final String iss, - final String sp, - final String vol, - final String edition, - final String conferenceplace, - final String conferencedate, - final DataInfo dataInfo) { - final Journal j = new Journal(); - j.setName(name); - j.setIssnPrinted(issnPrinted); - j.setIssnOnline(issnOnline); - j.setIssnLinking(issnLinking); - j.setEp(ep); - j.setIss(iss); - j.setSp(sp); - j.setVol(vol); - j.setEdition(edition); - j.setConferenceplace(conferenceplace); - j.setConferencedate(conferencedate); - j.setDataInfo(dataInfo); - return j; - } - - public static DataInfo dataInfo(final Boolean deletedbyinference, - final String inferenceprovenance, - final Boolean inferred, - final Boolean invisible, - final Qualifier provenanceaction, - final String trust) { - final DataInfo d = new DataInfo(); - d.setDeletedbyinference(deletedbyinference); - d.setInferenceprovenance(inferenceprovenance); - d.setInferred(inferred); - d.setInvisible(invisible); - d.setProvenanceaction(provenanceaction); - d.setTrust(trust); - return d; - } - - public static String createOpenaireId(final String prefix, final String originalId) { - final String nsPrefix = StringUtils.substringBefore(originalId, "::"); - final String rest = StringUtils.substringAfter(originalId, "::"); - return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest)); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java new file mode 100644 index 000000000..4d222f360 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java @@ -0,0 +1,246 @@ +package eu.dnetlib.dhp.migration; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; +import org.dom4j.Node; + +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class OafMigrationExecutor extends AbstractMongoExecutor { + + public OafMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb, + final String dbUrl, final String dbUser, + final String dbPassword) throws Exception { + super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword); + } + + private static final Log log = LogFactory.getLog(MigrateMongoMdstoresApplication.class); + + @Override + protected void registerNamespaces(final Map nsContext) { + nsContext.put("dc", "http://purl.org/dc/elements/1.1/"); + nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); + nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); + nsContext.put("oaf", "http://namespace.openaire.eu/oaf"); + nsContext.put("oai", "http://www.openarchives.org/OAI/2.0/"); + nsContext.put("prov", "http://www.openarchives.org/OAI/2.0/provenance"); + } + + @Override + protected void addRelations(final List oafs, + final Document doc, + final String type, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp) { + for (final Object o : doc.selectNodes("//")) { // TODO + final Node n = (Node) o; + final Relation r = new Relation(); + r.setRelType(null); // TODO + r.setSubRelType(null); // TODO + r.setRelClass(null); // TODO + r.setSource(null); // TODO + r.setTarget(null); // TODO + r.setCollectedFrom(Arrays.asList(collectedFrom)); + r.setDataInfo(info); + r.setLastupdatetimestamp(lastUpdateTimestamp); + oafs.add(r); + } + + } + + @Override + protected List prepareAuthors(final Document doc, final DataInfo info) { + final List res = new ArrayList<>(); + int pos = 1; + for (final Object o : doc.selectNodes("//dc:creator")) { + final Node n = (Node) o; + final Author author = new Author(); + author.setFullname(n.getText()); + author.setRank(pos++); + } + return res; + } + + @Override + protected Qualifier prepareLanguages(final Document doc) { + return prepareQualifier(doc, "//dc:language", "dnet:languages", "dnet:languages"); + } + + @Override + protected List prepareSubjects(final Document doc, final DataInfo info) { + return prepareListStructProps(doc, "//dc:subject", info); + } + + @Override + protected List prepareTitles(final Document doc, final DataInfo info) { + return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info); + } + + @Override + protected List> prepareDescriptions(final Document doc, final DataInfo info) { + return prepareListFields(doc, "//dc:description", info); + } + + @Override + protected Field preparePublisher(final Document doc, final DataInfo info) { + return prepareField(doc, "//dc:publisher", info); + } + + @Override + protected List> prepareFormats(final Document doc, final DataInfo info) { + return prepareListFields(doc, "//dc:format", info); + } + + @Override + protected List> prepareContributors(final Document doc, final DataInfo info) { + return prepareListFields(doc, "//dc:contributor", info); + } + + @Override + protected List> prepareCoverages(final Document doc, final DataInfo info) { + return prepareListFields(doc, "//dc:coverage", info); + } + + @Override + protected List prepareInstances(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareSources(final Document doc, final DataInfo info) { + return prepareListFields(doc, "//dc:source", info); + } + + @Override + protected Field prepareEmbargoEndDate(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareRelevantDates(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductTools(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareSoftwareCodeRepositoryUrl(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareSoftwareLicenses(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareSoftwareDocumentationUrls(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetMetadataVersionNumber(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetLastMetadataUpdate(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetVersion(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetSize(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetDevice(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetStorageDate(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + /* + * private StructuredProperty prepareStructProp(final Document doc, final String xpath, final DataInfo dataInfo) { if + * (StringUtils.isBlank(s)) { return null; } final String[] parts = s.split("###"); if (parts.length == 2) { final String value = + * parts[0]; final String[] arr = parts[1].split("@@@"); if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], + * arr[3], dataInfo); } } return null; } + * + * private List prepareListOfStructProps(final Document doc, final String xpath, final DataInfo dataInfo) { final + * List res = new ArrayList<>(); if (array != null) { for (final String s : (String[]) array.getArray()) { final + * StructuredProperty sp = prepareStructProp(s, dataInfo); if (sp != null) { res.add(sp); } } } + * + * return res; } + * + * private Journal prepareJournal(final Document doc, final String xpath, final DataInfo info) { if (StringUtils.isNotBlank(sj)) { final + * String[] arr = sj.split("@@@"); if (arr.length == 3) { final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null; final + * String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;; final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;; + * if (issn != null || eissn != null || lissn != null) { return journal(name, issn, eissn, eissn, null, null, null, null, null, null, + * null, info); } } } return null; } + */ + +}