2020-07-22 17:29:48 +02:00
|
|
|
|
2020-10-06 16:44:51 +02:00
|
|
|
package eu.dnetlib.dhp.oa.dedup.model;
|
2020-07-22 17:29:48 +02:00
|
|
|
|
|
|
|
import java.io.Serializable;
|
|
|
|
import java.util.Date;
|
2020-07-24 20:10:47 +02:00
|
|
|
import java.util.List;
|
2020-10-06 16:21:34 +02:00
|
|
|
import java.util.Set;
|
|
|
|
import java.util.stream.Collectors;
|
2020-07-22 17:29:48 +02:00
|
|
|
|
2020-10-06 16:44:51 +02:00
|
|
|
import eu.dnetlib.dhp.oa.dedup.IdGenerator;
|
2020-09-29 15:31:46 +02:00
|
|
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
|
|
|
|
|
|
public class Identifier implements Serializable, Comparable<Identifier> {
|
|
|
|
|
|
|
|
StructuredProperty pid;
|
|
|
|
Date date;
|
|
|
|
PidType type;
|
|
|
|
List<KeyValue> collectedFrom;
|
|
|
|
EntityType entityType;
|
|
|
|
String originalID;
|
|
|
|
|
|
|
|
boolean useOriginal = false; // to know if the top identifier won because of the alphabetical order of the original
|
|
|
|
// ID
|
|
|
|
|
|
|
|
public Identifier(StructuredProperty pid, Date date, PidType type, List<KeyValue> collectedFrom,
|
|
|
|
EntityType entityType, String originalID) {
|
|
|
|
this.pid = pid;
|
|
|
|
this.date = date;
|
|
|
|
this.type = type;
|
|
|
|
this.collectedFrom = collectedFrom;
|
|
|
|
this.entityType = entityType;
|
|
|
|
this.originalID = originalID;
|
|
|
|
}
|
|
|
|
|
|
|
|
public StructuredProperty getPid() {
|
|
|
|
return pid;
|
|
|
|
}
|
|
|
|
|
2020-10-06 16:21:34 +02:00
|
|
|
public void setPid(StructuredProperty pid) {
|
2020-09-29 15:31:46 +02:00
|
|
|
this.pid = pid;
|
|
|
|
}
|
|
|
|
|
|
|
|
public Date getDate() {
|
|
|
|
return date;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setDate(Date date) {
|
|
|
|
this.date = date;
|
|
|
|
}
|
|
|
|
|
|
|
|
public PidType getType() {
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setType(PidType type) {
|
|
|
|
this.type = type;
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<KeyValue> getCollectedFrom() {
|
|
|
|
return collectedFrom;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setCollectedFrom(List<KeyValue> collectedFrom) {
|
|
|
|
this.collectedFrom = collectedFrom;
|
|
|
|
}
|
|
|
|
|
|
|
|
public EntityType getEntityType() {
|
|
|
|
return entityType;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setEntityType(EntityType entityType) {
|
|
|
|
this.entityType = entityType;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getOriginalID() {
|
|
|
|
return originalID;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setOriginalID(String originalID) {
|
|
|
|
this.originalID = originalID;
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean isUseOriginal() {
|
|
|
|
return useOriginal;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setUseOriginal(boolean useOriginal) {
|
|
|
|
this.useOriginal = useOriginal;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public int compareTo(Identifier i) {
|
|
|
|
// priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4)
|
|
|
|
// alphabetical order of the originalID
|
2020-10-06 16:21:34 +02:00
|
|
|
|
|
|
|
Set<String> lKeys = this.collectedFrom.stream().map(KeyValue::getKey).collect(Collectors.toSet());
|
|
|
|
Set<String> rKeys = i.getCollectedFrom().stream().map(KeyValue::getKey).collect(Collectors.toSet());
|
|
|
|
|
2020-09-29 15:31:46 +02:00
|
|
|
if (this.getType().compareTo(i.getType()) == 0) { // same type
|
|
|
|
if (entityType == EntityType.publication) {
|
2020-10-06 16:21:34 +02:00
|
|
|
if (isFromDatasourceID(lKeys, IdGenerator.CROSSREF_ID)
|
|
|
|
&& !isFromDatasourceID(rKeys, IdGenerator.CROSSREF_ID))
|
2020-09-29 15:31:46 +02:00
|
|
|
return 1;
|
2020-10-06 16:21:34 +02:00
|
|
|
if (isFromDatasourceID(rKeys, IdGenerator.CROSSREF_ID)
|
|
|
|
&& !isFromDatasourceID(lKeys, IdGenerator.CROSSREF_ID))
|
2020-09-29 15:31:46 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (entityType == EntityType.dataset) {
|
2020-10-06 16:21:34 +02:00
|
|
|
if (isFromDatasourceID(lKeys, IdGenerator.DATACITE_ID)
|
|
|
|
&& !isFromDatasourceID(rKeys, IdGenerator.DATACITE_ID))
|
2020-09-29 15:31:46 +02:00
|
|
|
return 1;
|
2020-10-06 16:21:34 +02:00
|
|
|
if (isFromDatasourceID(rKeys, IdGenerator.DATACITE_ID)
|
|
|
|
&& !isFromDatasourceID(lKeys, IdGenerator.DATACITE_ID))
|
2020-09-29 15:31:46 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-10-06 16:21:34 +02:00
|
|
|
if (this.getDate().compareTo(i.getDate()) == 0) {// same date
|
2020-09-29 15:31:46 +02:00
|
|
|
|
2020-10-09 09:30:23 +02:00
|
|
|
if (this.originalID.compareTo(i.originalID) < 0)
|
2020-09-29 15:31:46 +02:00
|
|
|
this.useOriginal = true;
|
|
|
|
else
|
|
|
|
i.setUseOriginal(true);
|
|
|
|
|
|
|
|
// the minus because we need to take the alphabetically lower id
|
|
|
|
return -this.originalID.compareTo(i.originalID);
|
|
|
|
} else
|
|
|
|
// the minus is because we need to take the elder date
|
2020-10-06 16:21:34 +02:00
|
|
|
return -this.getDate().compareTo(i.getDate());
|
2020-09-29 15:31:46 +02:00
|
|
|
} else {
|
|
|
|
return this.getType().compareTo(i.getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-10-06 16:21:34 +02:00
|
|
|
public boolean isFromDatasourceID(Set<String> collectedFrom, String dsId) {
|
|
|
|
return collectedFrom.contains(dsId);
|
2020-09-29 15:31:46 +02:00
|
|
|
}
|
2020-07-22 17:29:48 +02:00
|
|
|
}
|