master #2

Merged
sandro.labruzzo merged 16 commits from michele.artini/dnet-hadoop:master into master 2020-02-17 10:43:09 +01:00
4 changed files with 25 additions and 10 deletions
Showing only changes of commit 80cb52593f - Show all commits

View File

@ -111,6 +111,10 @@ public class AbstractMigrationExecutor implements Closeable {
return Arrays.stream(values).map(v -> field(v, info)).filter(Objects::nonNull).collect(Collectors.toList()); return Arrays.stream(values).map(v -> field(v, info)).filter(Objects::nonNull).collect(Collectors.toList());
} }
public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
return values.stream().map(v -> field(v, info)).filter(Objects::nonNull).collect(Collectors.toList());
}
public static Qualifier qualifier(final String classid, final String classname, final String schemeid, final String schemename) { public static Qualifier qualifier(final String classid, final String classname, final String schemeid, final String schemename) {
final Qualifier q = new Qualifier(); final Qualifier q = new Qualifier();
q.setClassid(classid); q.setClassid(classid);

View File

@ -385,6 +385,8 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
protected OAIProvenance prepareOAIprovenance(final Document doc) { protected OAIProvenance prepareOAIprovenance(final Document doc) {
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
if (n == null) { return null; }
final String identifier = n.valueOf("./*[local-name()='identifier']"); final String identifier = n.valueOf("./*[local-name()='identifier']");
final String baseURL = n.valueOf("./*[local-name()='baseURL']");; final String baseURL = n.valueOf("./*[local-name()='baseURL']");;
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");; final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");;
@ -393,6 +395,7 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
final String harvestDate = n.valueOf("@harvestDate");; final String harvestDate = n.valueOf("@harvestDate");;
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
} }
protected DataInfo prepareDataInfo(final Document doc) { protected DataInfo prepareDataInfo(final Document doc) {
@ -416,7 +419,7 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
} }
protected List<Field<String>> prepareListFields(final Node node, final String xpath, final DataInfo info) { protected List<Field<String>> prepareListFields(final Node node, final String xpath, final DataInfo info) {
return listFields(info, (String[]) prepareListString(node, xpath).toArray()); return listFields(info, prepareListString(node, xpath));
} }
protected List<String> prepareListString(final Node node, final String xpath) { protected List<String> prepareListString(final Node node, final String xpath) {

View File

@ -2,6 +2,7 @@ package eu.dnetlib.dhp.migration;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
@ -35,7 +36,7 @@ public class MdstoreClient implements Closeable {
public Map<String, String> validCollections(final String mdFormat, final String mdLayout, final String mdInterpretation) { public Map<String, String> validCollections(final String mdFormat, final String mdLayout, final String mdInterpretation) {
final Map<String, String> transactions = new HashMap<>(); final Map<String, String> transactions = new HashMap<>();
for (final Document entry : getColl(db, COLL_METADATA_MANAGER).find()) { for (final Document entry : getColl(db, COLL_METADATA_MANAGER, true).find()) {
final String mdId = entry.getString("mdId"); final String mdId = entry.getString("mdId");
final String currentId = entry.getString("currentId"); final String currentId = entry.getString("currentId");
if (StringUtils.isNoneBlank(mdId, currentId)) { if (StringUtils.isNoneBlank(mdId, currentId)) {
@ -44,7 +45,7 @@ public class MdstoreClient implements Closeable {
} }
final Map<String, String> res = new HashMap<>(); final Map<String, String> res = new HashMap<>();
for (final Document entry : getColl(db, COLL_METADATA).find()) { for (final Document entry : getColl(db, COLL_METADATA, true).find()) {
if (entry.getString("format").equals(mdFormat) && entry.getString("layout").equals(mdLayout) if (entry.getString("format").equals(mdFormat) && entry.getString("layout").equals(mdLayout)
&& entry.getString("interpretation").equals(mdInterpretation) && transactions.containsKey(entry.getString("mdId"))) { && entry.getString("interpretation").equals(mdInterpretation) && transactions.containsKey(entry.getString("mdId"))) {
res.put(entry.getString("mdId"), transactions.get(entry.getString("mdId"))); res.put(entry.getString("mdId"), transactions.get(entry.getString("mdId")));
@ -63,17 +64,23 @@ public class MdstoreClient implements Closeable {
return client.getDatabase(dbName); return client.getDatabase(dbName);
} }
private MongoCollection<Document> getColl(final MongoDatabase db, final String collName) { private MongoCollection<Document> getColl(final MongoDatabase db, final String collName, final boolean abortIfMissing) {
if (!Iterables.contains(db.listCollectionNames(), collName)) { if (!Iterables.contains(db.listCollectionNames(), collName)) {
final String err = String.format(String.format("Missing collection '%s' in database '%s'", collName, db.getName())); final String err = String.format(String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
log.warn(err); log.warn(err);
if (abortIfMissing) {
throw new RuntimeException(err); throw new RuntimeException(err);
} else {
return null;
}
} }
return db.getCollection(collName); return db.getCollection(collName);
} }
public Iterable<String> listRecords(final String coll) { public Iterable<String> listRecords(final String collName) {
return () -> StreamSupport.stream(getColl(db, coll).find().spliterator(), false) final MongoCollection<Document> coll = getColl(db, collName, false);
return coll == null ? new ArrayList<>()
: () -> StreamSupport.stream(coll.find().spliterator(), false)
.filter(e -> e.containsKey("body")) .filter(e -> e.containsKey("body"))
.map(e -> e.getString("body")) .map(e -> e.getString("body"))
.iterator(); .iterator();

View File

@ -34,6 +34,7 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
@Override @Override
protected void registerNamespaces(final Map<String, String> nsContext) { protected void registerNamespaces(final Map<String, String> nsContext) {
super.registerNamespaces(nsContext);
nsContext.put("dc", "http://datacite.org/schema/kernel-3"); nsContext.put("dc", "http://datacite.org/schema/kernel-3");
} }