forked from D-Net/dnet-hadoop
refactoring of the procedure for the id generation, minor changes and addition of a comparation on the original id and the origin datasource
This commit is contained in:
parent
b260fee787
commit
d47352cbc7
|
@ -18,7 +18,7 @@ import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
|
||||||
public class DatePicker {
|
public class DatePicker {
|
||||||
|
|
||||||
private static final String DATE_PATTERN = "\\d{4}-\\d{2}-\\d{2}";
|
public static final String DATE_PATTERN = "\\d{4}-\\d{2}-\\d{2}";
|
||||||
private static final String DATE_DEFAULT_SUFFIX = "01-01";
|
private static final String DATE_DEFAULT_SUFFIX = "01-01";
|
||||||
private static final int YEAR_LB = 1300;
|
private static final int YEAR_LB = 1300;
|
||||||
private static final int YEAR_UB = Year.now().getValue() + 5;
|
private static final int YEAR_UB = Year.now().getValue() + 5;
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.dedup;
|
||||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
@ -75,8 +76,6 @@ public class DedupRecordFactory {
|
||||||
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz)
|
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz)
|
||||||
throws IllegalAccessException, InstantiationException {
|
throws IllegalAccessException, InstantiationException {
|
||||||
|
|
||||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
|
||||||
|
|
||||||
T entity = clazz.newInstance();
|
T entity = clazz.newInstance();
|
||||||
|
|
||||||
final Collection<String> dates = Lists.newArrayList();
|
final Collection<String> dates = Lists.newArrayList();
|
||||||
|
@ -88,9 +87,8 @@ public class DedupRecordFactory {
|
||||||
t -> {
|
t -> {
|
||||||
T duplicate = t._2();
|
T duplicate = t._2();
|
||||||
|
|
||||||
StructuredProperty bestPid = bestPid(duplicate.getPid());
|
//prepare the list of pids to use for the id generation
|
||||||
if (bestPid != null)
|
bestPids.addAll(IdGenerator.bestPidtoIdentifier(duplicate));
|
||||||
bestPids.add(new Identifier(bestPid, extractDate(duplicate, sdf), PidType.classidValueOf(bestPid.getQualifier().getClassid())));
|
|
||||||
|
|
||||||
entity.mergeFrom(duplicate);
|
entity.mergeFrom(duplicate);
|
||||||
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
||||||
|
@ -109,11 +107,7 @@ public class DedupRecordFactory {
|
||||||
((Result) entity).setAuthor(AuthorMerger.merge(authors));
|
((Result) entity).setAuthor(AuthorMerger.merge(authors));
|
||||||
}
|
}
|
||||||
|
|
||||||
Identifier bestPid = winnerPid(bestPids);
|
entity.setId(IdGenerator.generate(bestPids, id));
|
||||||
if (bestPid == null)
|
|
||||||
entity.setId(id);
|
|
||||||
else
|
|
||||||
entity.setId(id.split("\\|")[0] + "|" + createPrefix(bestPid.getPid().getQualifier().getClassid()) + "::" + DedupUtility.md5(bestPid.getPid().getValue()));
|
|
||||||
|
|
||||||
entity.setLastupdatetimestamp(ts);
|
entity.setLastupdatetimestamp(ts);
|
||||||
entity.setDataInfo(dataInfo);
|
entity.setDataInfo(dataInfo);
|
||||||
|
@ -121,61 +115,5 @@ public class DedupRecordFactory {
|
||||||
return entity;
|
return entity;
|
||||||
}
|
}
|
||||||
|
|
||||||
//pick the best pid from the list (consider date and pidtype)
|
|
||||||
public static Identifier winnerPid(List<Identifier> pids) {
|
|
||||||
if (pids == null || pids.size() == 0)
|
|
||||||
return null;
|
|
||||||
Optional<Identifier> bp = pids.stream()
|
|
||||||
.filter(pid -> pid.getType() != PidType.undefined)
|
|
||||||
.max(Identifier::compareTo);
|
|
||||||
return bp.orElse(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
//pick the best pid from the entity
|
|
||||||
public static StructuredProperty bestPid(List<StructuredProperty> pids) {
|
|
||||||
|
|
||||||
if (pids == null || pids.size() == 0)
|
|
||||||
return null;
|
|
||||||
Optional<StructuredProperty> bp = pids.stream()
|
|
||||||
.filter(pid -> PidType.classidValueOf(pid.getQualifier().getClassid()) != PidType.undefined)
|
|
||||||
.max(Comparator.comparing(pid -> PidType.classidValueOf(pid.getQualifier().getClassid())));
|
|
||||||
|
|
||||||
return bp.orElse(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
//create the prefix (length = 12): dedup_+ pidType
|
|
||||||
public static String createPrefix(String pidType) {
|
|
||||||
|
|
||||||
StringBuilder prefix = new StringBuilder("dedup_" + pidType);
|
|
||||||
|
|
||||||
while (prefix.length() < 12) {
|
|
||||||
prefix.append("_");
|
|
||||||
}
|
|
||||||
return prefix.toString().substring(0, 12);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//extracts the date from the record. If the date is not available or is not wellformed, it returns a base date: 00-01-01
|
|
||||||
public static <T extends OafEntity> Date extractDate(T duplicate, SimpleDateFormat sdf){
|
|
||||||
|
|
||||||
String date = "2000-01-01";
|
|
||||||
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
|
||||||
Result result = (Result) duplicate;
|
|
||||||
if (isWellformed(result.getDateofacceptance())){
|
|
||||||
date = result.getDateofacceptance().getValue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
return sdf.parse(date);
|
|
||||||
} catch (ParseException e) {
|
|
||||||
return new Date();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isWellformed(Field<String> date) {
|
|
||||||
return date != null && StringUtils.isNotBlank(date.getValue()) && date.getValue().matches("\\d{4}-\\d{2}-\\d{2}") && DatePicker.inRange(date.getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import org.apache.commons.lang.NullArgumentException;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class IdGenerator implements Serializable {
|
||||||
|
|
||||||
|
private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
public static String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2";
|
||||||
|
public static String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254";
|
||||||
|
|
||||||
|
//pick the best pid from the list (consider date and pidtype)
|
||||||
|
public static String generate(List<Identifier> pids, String defaultID) {
|
||||||
|
if (pids == null || pids.size() == 0)
|
||||||
|
return defaultID;
|
||||||
|
|
||||||
|
Optional<Identifier> bp = pids.stream()
|
||||||
|
.max(Identifier::compareTo);
|
||||||
|
|
||||||
|
if (bp.get().isUseOriginal() || bp.get().getPid().getValue() == null) {
|
||||||
|
return bp.get().getOriginalID().split("\\|")[0] + "|dedup_wf_001::" + DedupUtility.md5(bp.get().getOriginalID());
|
||||||
|
} else {
|
||||||
|
return bp.get().getOriginalID().split("\\|")[0] + "|" + createPrefix(bp.get().getPid().getQualifier().getClassid()) + "::" + DedupUtility.md5(bp.get().getPid().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//pick the best pid from the entity. Returns a list (length 1) to save time in the call
|
||||||
|
public static <T extends OafEntity> List<Identifier> bestPidtoIdentifier(T entity) {
|
||||||
|
|
||||||
|
if (entity.getPid() == null || entity.getPid().size() == 0)
|
||||||
|
return Lists.newArrayList(new Identifier(new StructuredProperty(), new Date(), PidType.original, entity.getCollectedfrom(), EntityType.fromClass(entity.getClass()), entity.getId()));
|
||||||
|
|
||||||
|
Optional<StructuredProperty> bp = entity.getPid().stream()
|
||||||
|
.filter(pid -> PidType.classidValueOf(pid.getQualifier().getClassid()) != PidType.undefined)
|
||||||
|
.max(Comparator.comparing(pid -> PidType.classidValueOf(pid.getQualifier().getClassid())));
|
||||||
|
|
||||||
|
return bp.map(structuredProperty ->
|
||||||
|
Lists.newArrayList(new Identifier(structuredProperty, extractDate(entity, sdf), PidType.classidValueOf(structuredProperty.getQualifier().getClassid()), entity.getCollectedfrom(), EntityType.fromClass(entity.getClass()), entity.getId()))
|
||||||
|
).orElseGet(() -> Lists.newArrayList(new Identifier(new StructuredProperty(), new Date(), PidType.original, entity.getCollectedfrom(), EntityType.fromClass(entity.getClass()), entity.getId())));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//create the prefix (length = 12): dedup_+ pidType
|
||||||
|
public static String createPrefix(String pidType) {
|
||||||
|
|
||||||
|
StringBuilder prefix = new StringBuilder("dedup_" + pidType);
|
||||||
|
|
||||||
|
while (prefix.length() < 12) {
|
||||||
|
prefix.append("_");
|
||||||
|
}
|
||||||
|
return prefix.toString().substring(0, 12);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//extracts the date from the record. If the date is not available or is not wellformed, it returns a base date: 00-01-01
|
||||||
|
public static <T extends OafEntity> Date extractDate(T duplicate, SimpleDateFormat sdf){
|
||||||
|
|
||||||
|
String date = "2000-01-01";
|
||||||
|
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
||||||
|
Result result = (Result) duplicate;
|
||||||
|
if (isWellformed(result.getDateofacceptance())){
|
||||||
|
date = result.getDateofacceptance().getValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return sdf.parse(date);
|
||||||
|
} catch (ParseException e) {
|
||||||
|
return new Date();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isWellformed(Field<String> date) {
|
||||||
|
return date != null && StringUtils.isNotBlank(date.getValue()) && date.getValue().matches(DatePicker.DATE_PATTERN) && DatePicker.inRange(date.getValue());
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,20 +1,31 @@
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class Identifier implements Serializable, Comparable<Identifier>{
|
public class Identifier implements Serializable, Comparable<Identifier>{
|
||||||
|
|
||||||
StructuredProperty pid;
|
StructuredProperty pid;
|
||||||
Date date;
|
Date date;
|
||||||
PidType type;
|
PidType type;
|
||||||
|
List<KeyValue> collectedFrom;
|
||||||
|
EntityType entityType;
|
||||||
|
String originalID;
|
||||||
|
|
||||||
public Identifier(StructuredProperty pid, Date date, PidType type) {
|
boolean useOriginal = false; //to know if the top identifier won because of the alphabetical order of the original ID
|
||||||
|
|
||||||
|
public Identifier(StructuredProperty pid, Date date, PidType type, List<KeyValue> collectedFrom, EntityType entityType, String originalID) {
|
||||||
this.pid = pid;
|
this.pid = pid;
|
||||||
this.date = date;
|
this.date = date;
|
||||||
this.type = type;
|
this.type = type;
|
||||||
|
this.collectedFrom = collectedFrom;
|
||||||
|
this.entityType = entityType;
|
||||||
|
this.originalID = originalID;
|
||||||
}
|
}
|
||||||
|
|
||||||
public StructuredProperty getPid() {
|
public StructuredProperty getPid() {
|
||||||
|
@ -41,15 +52,81 @@ public class Identifier implements Serializable, Comparable<Identifier>{
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<KeyValue> getCollectedFrom() {
|
||||||
|
return collectedFrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCollectedFrom(List<KeyValue> collectedFrom) {
|
||||||
|
this.collectedFrom = collectedFrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EntityType getEntityType() {
|
||||||
|
return entityType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEntityType(EntityType entityType) {
|
||||||
|
this.entityType = entityType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOriginalID() {
|
||||||
|
return originalID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOriginalID(String originalID) {
|
||||||
|
this.originalID = originalID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isUseOriginal() {
|
||||||
|
return useOriginal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUseOriginal(boolean useOriginal) {
|
||||||
|
this.useOriginal = useOriginal;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(Identifier i) {
|
public int compareTo(Identifier i) {
|
||||||
//priority in comparisons: 1) pidtype, 2) date
|
//priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) alphabetical order of the originalID
|
||||||
if (this.getType().compareTo(i.getType()) == 0){ //same type
|
if (this.getType().compareTo(i.getType()) == 0){ //same type
|
||||||
return this.getDate().compareTo(date);
|
if (entityType == EntityType.publication) {
|
||||||
|
if (isFromDatasourceID(this.collectedFrom, IdGenerator.CROSSREF_ID) && !isFromDatasourceID(i.collectedFrom, IdGenerator.CROSSREF_ID))
|
||||||
|
return 1;
|
||||||
|
if (isFromDatasourceID(i.collectedFrom, IdGenerator.CROSSREF_ID) && !isFromDatasourceID(this.collectedFrom, IdGenerator.CROSSREF_ID))
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (entityType == EntityType.dataset) {
|
||||||
|
if (isFromDatasourceID(this.collectedFrom, IdGenerator.DATACITE_ID) && !isFromDatasourceID(i.collectedFrom, IdGenerator.DATACITE_ID))
|
||||||
|
return 1;
|
||||||
|
if (isFromDatasourceID(i.collectedFrom, IdGenerator.DATACITE_ID) && !isFromDatasourceID(this.collectedFrom, IdGenerator.DATACITE_ID))
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.getDate().compareTo(date) == 0) {//same date
|
||||||
|
|
||||||
|
if (this.originalID.compareTo(i.originalID) > 0)
|
||||||
|
this.useOriginal = true;
|
||||||
|
else
|
||||||
|
i.setUseOriginal(true);
|
||||||
|
|
||||||
|
//the minus because we need to take the alphabetically lower id
|
||||||
|
return -this.originalID.compareTo(i.originalID);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
//the minus is because we need to take the elder date
|
||||||
|
return -this.getDate().compareTo(date);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return this.getType().compareTo(i.getType());
|
return this.getType().compareTo(i.getType());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isFromDatasourceID(List<KeyValue> collectedFrom, String dsId){
|
||||||
|
|
||||||
|
for(KeyValue cf: collectedFrom) {
|
||||||
|
if(cf.getKey().equals(dsId))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ public enum PidType {
|
||||||
|
|
||||||
//from the less to the more important
|
//from the less to the more important
|
||||||
undefined,
|
undefined,
|
||||||
|
original,
|
||||||
orcid,
|
orcid,
|
||||||
ror,
|
ror,
|
||||||
grid,
|
grid,
|
||||||
|
@ -22,33 +23,3 @@ public enum PidType {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//dnet:pid_types
|
|
||||||
//"actrn"
|
|
||||||
//"nct"
|
|
||||||
//"euctr"
|
|
||||||
//"epo_id"
|
|
||||||
//"gsk"
|
|
||||||
//"GeoPass"
|
|
||||||
//"GBIF"
|
|
||||||
//"isrctn"
|
|
||||||
//"ISNI"
|
|
||||||
//"jprn"
|
|
||||||
//"mag_id"
|
|
||||||
//"oai"
|
|
||||||
//"orcid"
|
|
||||||
//"PANGAEA"
|
|
||||||
//"epo_nr_epodoc"
|
|
||||||
//"UNKNOWN"
|
|
||||||
//"VIAF"
|
|
||||||
//"arXiv"
|
|
||||||
//"doi"
|
|
||||||
//"grid"
|
|
||||||
//"info:eu-repo/dai"
|
|
||||||
//"orcidworkid"
|
|
||||||
//"pmc"
|
|
||||||
//"pmid"
|
|
||||||
//"urn"
|
|
||||||
//"who"
|
|
||||||
//"drks"
|
|
||||||
//"pdb"
|
|
|
@ -23,6 +23,8 @@ public class EntityMergerTest implements Serializable {
|
||||||
List<Tuple2<String, Publication>> publications;
|
List<Tuple2<String, Publication>> publications;
|
||||||
List<Tuple2<String, Publication>> publications2;
|
List<Tuple2<String, Publication>> publications2;
|
||||||
List<Tuple2<String, Publication>> publications3;
|
List<Tuple2<String, Publication>> publications3;
|
||||||
|
List<Tuple2<String, Publication>> publications4;
|
||||||
|
List<Tuple2<String, Publication>> publications5;
|
||||||
|
|
||||||
String testEntityBasePath;
|
String testEntityBasePath;
|
||||||
DataInfo dataInfo;
|
DataInfo dataInfo;
|
||||||
|
@ -40,7 +42,8 @@ public class EntityMergerTest implements Serializable {
|
||||||
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
|
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
|
||||||
publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class);
|
publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class);
|
||||||
publications3 = readSample(testEntityBasePath + "/publication_merge3.json", Publication.class);
|
publications3 = readSample(testEntityBasePath + "/publication_merge3.json", Publication.class);
|
||||||
|
publications4 = readSample(testEntityBasePath + "/publication_merge4.json", Publication.class);
|
||||||
|
publications5 = readSample(testEntityBasePath + "/publication_merge5.json", Publication.class);
|
||||||
|
|
||||||
pub_top = getTopPub(publications);
|
pub_top = getTopPub(publications);
|
||||||
|
|
||||||
|
@ -58,7 +61,7 @@ public class EntityMergerTest implements Serializable {
|
||||||
|
|
||||||
assertEquals(merged.getBestaccessright().getClassid(), "OPEN SOURCE");
|
assertEquals(merged.getBestaccessright().getClassid(), "OPEN SOURCE");
|
||||||
|
|
||||||
assertEquals(merged.getId(), "00|dedup_doi___::0968af610a356656706657e4f234b340");
|
assertEquals(merged.getId(), "50|dedup_doi___::0968af610a356656706657e4f234b340");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,7 +72,7 @@ public class EntityMergerTest implements Serializable {
|
||||||
.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
|
.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
// verify id
|
// verify id
|
||||||
assertEquals(pub_merged.getId(), "00|dedup_doi___::0968af610a356656706657e4f234b340");
|
assertEquals(pub_merged.getId(), "50|dedup_doi___::0968af610a356656706657e4f234b340");
|
||||||
|
|
||||||
assertEquals(pub_merged.getJournal(), pub_top.getJournal());
|
assertEquals(pub_merged.getJournal(), pub_top.getJournal());
|
||||||
assertEquals(pub_merged.getBestaccessright(), pub_top.getBestaccessright());
|
assertEquals(pub_merged.getBestaccessright(), pub_top.getBestaccessright());
|
||||||
|
@ -125,7 +128,7 @@ public class EntityMergerTest implements Serializable {
|
||||||
.entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class);
|
.entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
// verify id
|
// verify id
|
||||||
assertEquals(pub_merged.getId(), "00|dedup_doi___::0ca46ff10b2b4c756191719d85302b14");
|
assertEquals("50|dedup_doi___::0ca46ff10b2b4c756191719d85302b14", pub_merged.getId());
|
||||||
|
|
||||||
assertEquals(pub_merged.getAuthor().size(), 27);
|
assertEquals(pub_merged.getAuthor().size(), 27);
|
||||||
|
|
||||||
|
@ -138,10 +141,31 @@ public class EntityMergerTest implements Serializable {
|
||||||
.entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class);
|
.entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
// verify id
|
// verify id
|
||||||
assertEquals(pub_merged.getId(), "00|dedup_doi___::0ca46ff10b2b4c756191719d85302b14");
|
assertEquals( "50|dedup_doi___::0ca46ff10b2b4c756191719d85302b14", pub_merged.getId());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException {
|
||||||
|
|
||||||
|
Publication pub_merged = DedupRecordFactory
|
||||||
|
.entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
|
// verify id
|
||||||
|
assertEquals("50|dedup_wf_001::2d2bbbbcfb285e3fb3590237b79e2fa8", pub_merged.getId());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException {
|
||||||
|
|
||||||
|
Publication pub_merged = DedupRecordFactory
|
||||||
|
.entityMerger(dedupId, publications5.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
|
// verify id
|
||||||
|
assertEquals("50|dedup_wf_001::584b89679c3ccd1015b647ec63cc2699", pub_merged.getId());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public DataInfo setDI() {
|
public DataInfo setDI() {
|
||||||
DataInfo dataInfo = new DataInfo();
|
DataInfo dataInfo = new DataInfo();
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue