From bb1533a07e0dbaac9e71bbcf29f523e69e9dcdc2 Mon Sep 17 00:00:00 2001 From: Michele Artini Date: Wed, 5 Feb 2020 15:35:40 +0100 Subject: [PATCH] partial commit --- .../dhp/migration/AbstractMongoExecutor.java | 38 +++- .../dhp/migration/OafMigrationExecutor.java | 82 +++---- .../dhp/migration/OdfMigrationExecutor.java | 209 ++++++++++++++++++ 3 files changed, 279 insertions(+), 50 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OdfMigrationExecutor.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java index 51c39824a..cf1581b4d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMongoExecutor.java @@ -94,10 +94,13 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { final String type = doc.valueOf("//dr:CobjCategory/@type"); final KeyValue collectedFrom = keyValue(doc.valueOf("//oaf:collectedFrom/@id"), doc.valueOf("//oaf:collectedFrom/@name")); + final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom + : keyValue(doc.valueOf("//oaf:hostedBy/@id"), doc.valueOf("//oaf:hostedBy/@name")); + final DataInfo info = prepareDataInfo(doc); final long lastUpdateTimestamp = new Date().getTime(); - for (final Oaf oaf : createOafs(doc, type, collectedFrom, info, lastUpdateTimestamp)) { + for (final Oaf oaf : createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp)) { emitOaf(oaf); } } @@ -106,7 +109,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { protected abstract void registerNamespaces(Map nsContext); - protected List createOafs(final Document doc, final String type, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { + protected List createOafs(final Document doc, + final String type, + final KeyValue collectedFrom, + final KeyValue hostedBy, + final DataInfo info, + final long lastUpdateTimestamp) { final List oafs = new ArrayList<>(); @@ -114,14 +122,14 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { case "": case "publication": final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); p.setResulttype(PUBLICATION_RESULTTYPE_QUALIFIER); p.setJournal(prepareJournal(doc, info)); oafs.add(p); break; case "dataset": final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); d.setResulttype(DATASET_RESULTTYPE_QUALIFIER); d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setDevice(prepareDatasetDevice(doc, info)); @@ -134,7 +142,7 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { break; case "software": final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); s.setResulttype(SOFTWARE_RESULTTYPE_QUALIFIER); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setLicense(prepareSoftwareLicenses(doc, info)); @@ -145,7 +153,7 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { case "otherresearchproducts": default: final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); o.setResulttype(OTHER_RESULTTYPE_QUALIFIER); o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); @@ -163,7 +171,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { return oafs; } - private void populateResultFields(final Result r, final Document doc, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { + private void populateResultFields(final Result r, + final Document doc, + final KeyValue collectedFrom, + final KeyValue hostedBy, + final DataInfo info, + final long lastUpdateTimestamp) { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"))); @@ -193,12 +206,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { r.setRefereed(null); // TODO r.setContext(null); // TODO r.setExternalReference(null); // TODO - r.setInstance(prepareInstances(doc, info)); + r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy)); r.setProcessingchargeamount(null); // TODO r.setProcessingchargecurrency(null); // TODO } - protected abstract List prepareInstances(Document doc, DataInfo info); + protected abstract List prepareInstances(Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); protected abstract List> prepareSources(Document doc, DataInfo info); @@ -266,7 +279,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor { final String issnPrinted = n.valueOf("@issn"); final String issnOnline = n.valueOf("@eissn"); final String issnLinking = n.valueOf("@lissn"); - if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, null, null, null, null, null, null, null, info); } + final String ep = n.valueOf("@ep"); + final String iss = n.valueOf("@iss"); + final String sp = n.valueOf("@sp"); + final String vol = n.valueOf("@vol"); + final String edition = n.valueOf("@edition"); + if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } } return null; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java index 4d222f360..f46b31732 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OafMigrationExecutor.java @@ -23,14 +23,14 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OafMigrationExecutor extends AbstractMongoExecutor { + private static final Log log = LogFactory.getLog(OafMigrationExecutor.class); + public OafMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb, final String dbUrl, final String dbUser, final String dbPassword) throws Exception { super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword); } - private static final Log log = LogFactory.getLog(MigrateMongoMdstoresApplication.class); - @Override protected void registerNamespaces(final Map nsContext) { nsContext.put("dc", "http://purl.org/dc/elements/1.1/"); @@ -73,6 +73,7 @@ public class OafMigrationExecutor extends AbstractMongoExecutor { final Author author = new Author(); author.setFullname(n.getText()); author.setRank(pos++); + res.add(author); } return res; } @@ -118,9 +119,24 @@ public class OafMigrationExecutor extends AbstractMongoExecutor { } @Override - protected List prepareInstances(final Document doc, final DataInfo info) { - // TODO Auto-generated method stub - return null; + protected List prepareInstances(final Document doc, final DataInfo info, final KeyValue collectedfrom, final KeyValue hostedby) { + final List res = new ArrayList<>(); + for (final Object o : doc.selectNodes("//dc:identifier")) { + final String url = ((Node) o).getText().trim(); + if (url.startsWith("http")) { + final Instance instance = new Instance(); + instance.setUrl(url); + instance.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", "dnet:publication_resource", "dnet:publication_resource")); + instance.setCollectedfrom(collectedfrom); + instance.setHostedby(hostedby); + instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); + instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); + instance.setAccessright(prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes")); + instance.setLicense(field(doc.valueOf("//oaf:license"), info)); + res.add(instance); + } + } + return res; } @Override @@ -140,23 +156,7 @@ public class OafMigrationExecutor extends AbstractMongoExecutor { return null; } - @Override - protected List> prepareOtherResearchProductTools(final Document doc, final DataInfo info) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected List> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected List> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) { - // TODO Auto-generated method stub - return null; - } + // SOFTWARES @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { @@ -182,6 +182,7 @@ public class OafMigrationExecutor extends AbstractMongoExecutor { return null; } + // DATASETS @Override protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { // TODO Auto-generated method stub @@ -224,23 +225,24 @@ public class OafMigrationExecutor extends AbstractMongoExecutor { return null; } - /* - * private StructuredProperty prepareStructProp(final Document doc, final String xpath, final DataInfo dataInfo) { if - * (StringUtils.isBlank(s)) { return null; } final String[] parts = s.split("###"); if (parts.length == 2) { final String value = - * parts[0]; final String[] arr = parts[1].split("@@@"); if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], - * arr[3], dataInfo); } } return null; } - * - * private List prepareListOfStructProps(final Document doc, final String xpath, final DataInfo dataInfo) { final - * List res = new ArrayList<>(); if (array != null) { for (final String s : (String[]) array.getArray()) { final - * StructuredProperty sp = prepareStructProp(s, dataInfo); if (sp != null) { res.add(sp); } } } - * - * return res; } - * - * private Journal prepareJournal(final Document doc, final String xpath, final DataInfo info) { if (StringUtils.isNotBlank(sj)) { final - * String[] arr = sj.split("@@@"); if (arr.length == 3) { final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null; final - * String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;; final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;; - * if (issn != null || eissn != null || lissn != null) { return journal(name, issn, eissn, eissn, null, null, null, null, null, null, - * null, info); } } } return null; } - */ + // OTHER PRODUCTS + + @Override + protected List> prepareOtherResearchProductTools(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OdfMigrationExecutor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OdfMigrationExecutor.java new file mode 100644 index 000000000..bb0932883 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/OdfMigrationExecutor.java @@ -0,0 +1,209 @@ +package eu.dnetlib.dhp.migration; + +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; + +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class OdfMigrationExecutor extends AbstractMongoExecutor { + + private static final Log log = LogFactory.getLog(OdfMigrationExecutor.class); + + public OdfMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb, + final String dbUrl, final String dbUser, + final String dbPassword) throws Exception { + super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword); + } + + @Override + protected void registerNamespaces(final Map nsContext) { + // TODO Auto-generated method stub + + } + + @Override + protected List prepareInstances(final Document doc, final DataInfo info, final KeyValue collectedfrom, final KeyValue hostedby) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareSources(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareEmbargoEndDate(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareRelevantDates(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareCoverages(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareContributors(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareFormats(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field preparePublisher(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareDescriptions(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareTitles(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareSubjects(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Qualifier prepareLanguages(final Document doc) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareAuthors(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductTools(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareSoftwareCodeRepositoryUrl(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareSoftwareLicenses(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List> prepareSoftwareDocumentationUrls(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetMetadataVersionNumber(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetLastMetadataUpdate(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetVersion(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetSize(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetDevice(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected Field prepareDatasetStorageDate(final Document doc, final DataInfo info) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected void addRelations(final List oafs, + final Document doc, + final String type, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp) { + // TODO Auto-generated method stub + + } + +}