2020-09-29 15:31:46 +02:00
|
|
|
|
2020-07-24 20:10:47 +02:00
|
|
|
package eu.dnetlib.dhp.oa.dedup;
|
|
|
|
|
2020-11-04 15:02:02 +01:00
|
|
|
import static org.apache.commons.lang3.StringUtils.substringAfter;
|
|
|
|
import static org.apache.commons.lang3.StringUtils.substringBefore;
|
2020-09-29 15:31:46 +02:00
|
|
|
|
2020-11-04 15:02:02 +01:00
|
|
|
import java.io.Serializable;
|
|
|
|
import java.util.List;
|
2020-09-29 15:31:46 +02:00
|
|
|
|
2020-10-20 12:19:46 +02:00
|
|
|
import eu.dnetlib.dhp.oa.dedup.model.Identifier;
|
2020-07-24 20:10:47 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
2020-11-04 15:02:02 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
2020-07-24 20:10:47 +02:00
|
|
|
|
|
|
|
public class IdGenerator implements Serializable {
|
|
|
|
|
2020-09-29 15:31:46 +02:00
|
|
|
// pick the best pid from the list (consider date and pidtype)
|
2020-11-04 15:02:02 +01:00
|
|
|
public static <T extends OafEntity> String generate(List<Identifier<T>> pids, String defaultID) {
|
2020-09-29 15:31:46 +02:00
|
|
|
if (pids == null || pids.size() == 0)
|
|
|
|
return defaultID;
|
|
|
|
|
2020-11-04 15:02:02 +01:00
|
|
|
Identifier<T> bp = pids
|
2020-09-29 15:31:46 +02:00
|
|
|
.stream()
|
2020-11-04 15:02:02 +01:00
|
|
|
.min(Identifier::compareTo)
|
|
|
|
.get();
|
2020-09-29 15:31:46 +02:00
|
|
|
|
2020-11-04 15:02:02 +01:00
|
|
|
String prefix = substringBefore(bp.getOriginalID(), "|");
|
|
|
|
String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::");
|
|
|
|
String suffix = substringAfter(bp.getOriginalID(), "::");
|
2020-09-29 15:31:46 +02:00
|
|
|
|
2020-11-04 15:02:02 +01:00
|
|
|
final String pidType = substringBefore(ns, "_");
|
|
|
|
if (PidType.isValid(pidType)) {
|
|
|
|
return prefix + "|" + dedupify(ns) + "::" + suffix;
|
|
|
|
} else {
|
|
|
|
return prefix + "|dedup_wf_001::" + suffix;
|
2020-10-09 09:30:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-04 15:02:02 +01:00
|
|
|
private static String dedupify(String ns) {
|
2021-03-29 10:07:12 +02:00
|
|
|
|
|
|
|
StringBuilder prefix;
|
|
|
|
if (PidType.valueOf(substringBefore(ns, "_")) == PidType.openorgs) {
|
|
|
|
prefix = new StringBuilder(substringBefore(ns, "_"));
|
|
|
|
} else {
|
|
|
|
prefix = new StringBuilder(substringBefore(ns, "_")).append("_dedup");
|
|
|
|
}
|
|
|
|
|
2020-09-29 15:31:46 +02:00
|
|
|
while (prefix.length() < 12) {
|
|
|
|
prefix.append("_");
|
|
|
|
}
|
2020-11-04 15:02:02 +01:00
|
|
|
return prefix.substring(0, 12);
|
2020-09-29 15:31:46 +02:00
|
|
|
}
|
|
|
|
|
2020-07-24 20:10:47 +02:00
|
|
|
}
|