master #2

Merged
sandro.labruzzo merged 16 commits from michele.artini/dnet-hadoop:master into master 2020-02-17 10:43:09 +01:00
3 changed files with 279 additions and 50 deletions
Showing only changes of commit bb1533a07e - Show all commits

View File

@ -94,10 +94,13 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
final String type = doc.valueOf("//dr:CobjCategory/@type"); final String type = doc.valueOf("//dr:CobjCategory/@type");
final KeyValue collectedFrom = keyValue(doc.valueOf("//oaf:collectedFrom/@id"), doc.valueOf("//oaf:collectedFrom/@name")); final KeyValue collectedFrom = keyValue(doc.valueOf("//oaf:collectedFrom/@id"), doc.valueOf("//oaf:collectedFrom/@name"));
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom
: keyValue(doc.valueOf("//oaf:hostedBy/@id"), doc.valueOf("//oaf:hostedBy/@name"));
final DataInfo info = prepareDataInfo(doc); final DataInfo info = prepareDataInfo(doc);
final long lastUpdateTimestamp = new Date().getTime(); final long lastUpdateTimestamp = new Date().getTime();
for (final Oaf oaf : createOafs(doc, type, collectedFrom, info, lastUpdateTimestamp)) { for (final Oaf oaf : createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp)) {
emitOaf(oaf); emitOaf(oaf);
} }
} }
@ -106,7 +109,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
protected abstract void registerNamespaces(Map<String, String> nsContext); protected abstract void registerNamespaces(Map<String, String> nsContext);
protected List<Oaf> createOafs(final Document doc, final String type, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { protected List<Oaf> createOafs(final Document doc,
final String type,
final KeyValue collectedFrom,
final KeyValue hostedBy,
final DataInfo info,
final long lastUpdateTimestamp) {
final List<Oaf> oafs = new ArrayList<>(); final List<Oaf> oafs = new ArrayList<>();
@ -114,14 +122,14 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
case "": case "":
case "publication": case "publication":
final Publication p = new Publication(); final Publication p = new Publication();
populateResultFields(p, doc, collectedFrom, info, lastUpdateTimestamp); populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
p.setResulttype(PUBLICATION_RESULTTYPE_QUALIFIER); p.setResulttype(PUBLICATION_RESULTTYPE_QUALIFIER);
p.setJournal(prepareJournal(doc, info)); p.setJournal(prepareJournal(doc, info));
oafs.add(p); oafs.add(p);
break; break;
case "dataset": case "dataset":
final Dataset d = new Dataset(); final Dataset d = new Dataset();
populateResultFields(d, doc, collectedFrom, info, lastUpdateTimestamp); populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
d.setResulttype(DATASET_RESULTTYPE_QUALIFIER); d.setResulttype(DATASET_RESULTTYPE_QUALIFIER);
d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info)); d.setDevice(prepareDatasetDevice(doc, info));
@ -134,7 +142,7 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
break; break;
case "software": case "software":
final Software s = new Software(); final Software s = new Software();
populateResultFields(s, doc, collectedFrom, info, lastUpdateTimestamp); populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
s.setResulttype(SOFTWARE_RESULTTYPE_QUALIFIER); s.setResulttype(SOFTWARE_RESULTTYPE_QUALIFIER);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info)); s.setLicense(prepareSoftwareLicenses(doc, info));
@ -145,7 +153,7 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
case "otherresearchproducts": case "otherresearchproducts":
default: default:
final OtherResearchProduct o = new OtherResearchProduct(); final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, collectedFrom, info, lastUpdateTimestamp); populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
o.setResulttype(OTHER_RESULTTYPE_QUALIFIER); o.setResulttype(OTHER_RESULTTYPE_QUALIFIER);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
@ -163,7 +171,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
return oafs; return oafs;
} }
private void populateResultFields(final Result r, final Document doc, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { private void populateResultFields(final Result r,
final Document doc,
final KeyValue collectedFrom,
final KeyValue hostedBy,
final DataInfo info,
final long lastUpdateTimestamp) {
r.setDataInfo(info); r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"))); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier")));
@ -193,12 +206,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
r.setRefereed(null); // TODO r.setRefereed(null); // TODO
r.setContext(null); // TODO r.setContext(null); // TODO
r.setExternalReference(null); // TODO r.setExternalReference(null); // TODO
r.setInstance(prepareInstances(doc, info)); r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy));
r.setProcessingchargeamount(null); // TODO r.setProcessingchargeamount(null); // TODO
r.setProcessingchargecurrency(null); // TODO r.setProcessingchargecurrency(null); // TODO
} }
protected abstract List<Instance> prepareInstances(Document doc, DataInfo info); protected abstract List<Instance> prepareInstances(Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby);
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info); protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
@ -266,7 +279,12 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
final String issnPrinted = n.valueOf("@issn"); final String issnPrinted = n.valueOf("@issn");
final String issnOnline = n.valueOf("@eissn"); final String issnOnline = n.valueOf("@eissn");
final String issnLinking = n.valueOf("@lissn"); final String issnLinking = n.valueOf("@lissn");
if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, null, null, null, null, null, null, null, info); } final String ep = n.valueOf("@ep");
final String iss = n.valueOf("@iss");
final String sp = n.valueOf("@sp");
final String vol = n.valueOf("@vol");
final String edition = n.valueOf("@edition");
if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); }
} }
return null; return null;
} }

View File

@ -23,14 +23,14 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OafMigrationExecutor extends AbstractMongoExecutor { public class OafMigrationExecutor extends AbstractMongoExecutor {
private static final Log log = LogFactory.getLog(OafMigrationExecutor.class);
public OafMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb, public OafMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb,
final String dbUrl, final String dbUser, final String dbUrl, final String dbUser,
final String dbPassword) throws Exception { final String dbPassword) throws Exception {
super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword); super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword);
} }
private static final Log log = LogFactory.getLog(MigrateMongoMdstoresApplication.class);
@Override @Override
protected void registerNamespaces(final Map<String, String> nsContext) { protected void registerNamespaces(final Map<String, String> nsContext) {
nsContext.put("dc", "http://purl.org/dc/elements/1.1/"); nsContext.put("dc", "http://purl.org/dc/elements/1.1/");
@ -73,6 +73,7 @@ public class OafMigrationExecutor extends AbstractMongoExecutor {
final Author author = new Author(); final Author author = new Author();
author.setFullname(n.getText()); author.setFullname(n.getText());
author.setRank(pos++); author.setRank(pos++);
res.add(author);
} }
return res; return res;
} }
@ -118,9 +119,24 @@ public class OafMigrationExecutor extends AbstractMongoExecutor {
} }
@Override @Override
protected List<Instance> prepareInstances(final Document doc, final DataInfo info) { protected List<Instance> prepareInstances(final Document doc, final DataInfo info, final KeyValue collectedfrom, final KeyValue hostedby) {
// TODO Auto-generated method stub final List<Instance> res = new ArrayList<>();
return null; for (final Object o : doc.selectNodes("//dc:identifier")) {
final String url = ((Node) o).getText().trim();
if (url.startsWith("http")) {
final Instance instance = new Instance();
instance.setUrl(url);
instance.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", "dnet:publication_resource", "dnet:publication_resource"));
instance.setCollectedfrom(collectedfrom);
instance.setHostedby(hostedby);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance.setAccessright(prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
res.add(instance);
}
}
return res;
} }
@Override @Override
@ -140,23 +156,7 @@ public class OafMigrationExecutor extends AbstractMongoExecutor {
return null; return null;
} }
@Override // SOFTWARES
protected List<Field<String>> prepareOtherResearchProductTools(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
@ -182,6 +182,7 @@ public class OafMigrationExecutor extends AbstractMongoExecutor {
return null; return null;
} }
// DATASETS
@Override @Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) { protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub // TODO Auto-generated method stub
@ -224,23 +225,24 @@ public class OafMigrationExecutor extends AbstractMongoExecutor {
return null; return null;
} }
/* // OTHER PRODUCTS
* private StructuredProperty prepareStructProp(final Document doc, final String xpath, final DataInfo dataInfo) { if
* (StringUtils.isBlank(s)) { return null; } final String[] parts = s.split("###"); if (parts.length == 2) { final String value = @Override
* parts[0]; final String[] arr = parts[1].split("@@@"); if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], protected List<Field<String>> prepareOtherResearchProductTools(final Document doc, final DataInfo info) {
* arr[3], dataInfo); } } return null; } // TODO Auto-generated method stub
* return null;
* private List<StructuredProperty> prepareListOfStructProps(final Document doc, final String xpath, final DataInfo dataInfo) { final }
* List<StructuredProperty> res = new ArrayList<>(); if (array != null) { for (final String s : (String[]) array.getArray()) { final
* StructuredProperty sp = prepareStructProp(s, dataInfo); if (sp != null) { res.add(sp); } } } @Override
* protected List<Field<String>> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) {
* return res; } // TODO Auto-generated method stub
* return null;
* private Journal prepareJournal(final Document doc, final String xpath, final DataInfo info) { if (StringUtils.isNotBlank(sj)) { final }
* String[] arr = sj.split("@@@"); if (arr.length == 3) { final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null; final
* String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;; final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;; @Override
* if (issn != null || eissn != null || lissn != null) { return journal(name, issn, eissn, eissn, null, null, null, null, null, null, protected List<Field<String>> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) {
* null, info); } } } return null; } // TODO Auto-generated method stub
*/ return null;
}
} }

View File

@ -0,0 +1,209 @@
package eu.dnetlib.dhp.migration;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OdfMigrationExecutor extends AbstractMongoExecutor {
private static final Log log = LogFactory.getLog(OdfMigrationExecutor.class);
public OdfMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb,
final String dbUrl, final String dbUser,
final String dbPassword) throws Exception {
super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword);
}
@Override
protected void registerNamespaces(final Map<String, String> nsContext) {
// TODO Auto-generated method stub
}
@Override
protected List<Instance> prepareInstances(final Document doc, final DataInfo info, final KeyValue collectedfrom, final KeyValue hostedby) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareEmbargoEndDate(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Qualifier prepareLanguages(final Document doc) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Author> prepareAuthors(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareOtherResearchProductTools(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareSoftwareCodeRepositoryUrl(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<StructuredProperty> prepareSoftwareLicenses(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<Field<String>> prepareSoftwareDocumentationUrls(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareDatasetMetadataVersionNumber(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareDatasetLastMetadataUpdate(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
// TODO Auto-generated method stub
return null;
}
@Override
protected void addRelations(final List<Oaf> oafs,
final Document doc,
final String type,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
// TODO Auto-generated method stub
}
}