forked from antonis.lempesis/dnet-hadoop
common IdentifierFactory in use on the mapping from the aggregator data; merge the entities sharing the same id; code formatting
This commit is contained in:
parent
34f1d0904b
commit
266bf1a221
|
@ -6,18 +6,19 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentFactory;
|
import org.dom4j.DocumentFactory;
|
||||||
import org.dom4j.DocumentHelper;
|
import org.dom4j.DocumentHelper;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
|
||||||
public abstract class AbstractMdRecordToOafMapper {
|
public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
@ -152,7 +153,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return oafs;
|
return oafs;
|
||||||
}
|
}
|
||||||
|
|
||||||
private OafEntity createEntity(Document doc, String type, List<Instance> instances, KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) {
|
private OafEntity createEntity(Document doc, String type, List<Instance> instances, KeyValue collectedFrom,
|
||||||
|
DataInfo info, long lastUpdateTimestamp) {
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
case "publication":
|
case "publication":
|
||||||
final Publication p = new Publication();
|
final Publication p = new Publication();
|
||||||
|
|
|
@ -29,16 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -124,7 +115,27 @@ public class GenerateEntitiesApplication {
|
||||||
|
|
||||||
private static Oaf merge(final Oaf o1, final Oaf o2) {
|
private static Oaf merge(final Oaf o1, final Oaf o2) {
|
||||||
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
|
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
|
||||||
((OafEntity) o1).mergeFrom((OafEntity) o2);
|
if (ModelSupport.isSubClass(o1, Result.class)) {
|
||||||
|
if (ModelSupport.isSubClass(o1, Publication.class)) {
|
||||||
|
((Publication) o1).mergeFrom((Publication) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Dataset.class)) {
|
||||||
|
((Dataset) o1).mergeFrom((Dataset) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Software.class)) {
|
||||||
|
((Software) o1).mergeFrom((Software) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, OtherResearchProduct.class)) {
|
||||||
|
((OtherResearchProduct) o1).mergeFrom((OtherResearchProduct) o2);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("invalid Result subtype:" + o1.getClass().getCanonicalName());
|
||||||
|
}
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
|
||||||
|
((Datasource) o1).mergeFrom((Datasource) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
|
||||||
|
((Organization) o1).mergeFrom((Organization) o2);
|
||||||
|
} else if (ModelSupport.isSubClass(o1, Project.class)) {
|
||||||
|
((Project) o1).mergeFrom((Project) o2);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
|
||||||
|
}
|
||||||
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
|
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
|
||||||
((Relation) o1).mergeFrom((Relation) o2);
|
((Relation) o1).mergeFrom((Relation) o2);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -10,7 +10,6 @@ import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Element;
|
import org.dom4j.Element;
|
||||||
|
@ -20,6 +19,7 @@ import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
@ -248,8 +248,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Oaf> addOtherResultRels(
|
protected List<Oaf> addOtherResultRels(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final OafEntity entity) {
|
final OafEntity entity) {
|
||||||
|
|
||||||
final String docId = entity.getId();
|
final String docId = entity.getId();
|
||||||
final List<Oaf> res = new ArrayList<>();
|
final List<Oaf> res = new ArrayList<>();
|
||||||
|
|
|
@ -12,8 +12,6 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
@ -22,6 +20,8 @@ import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
@ -306,8 +306,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Oaf> addOtherResultRels(
|
protected List<Oaf> addOtherResultRels(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final OafEntity entity) {
|
final OafEntity entity) {
|
||||||
|
|
||||||
final String docId = entity.getId();
|
final String docId = entity.getId();
|
||||||
|
|
||||||
|
|
|
@ -11,8 +11,6 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
@ -26,6 +24,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
|
Loading…
Reference in New Issue