dnet-hadoop/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java

58 lines
1.7 KiB
Java
Raw Normal View History

package eu.dnetlib.dhp.oa.dedup;
import static org.apache.commons.lang3.StringUtils.substringAfter;
import static org.apache.commons.lang3.StringUtils.substringBefore;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.oa.dedup.model.Identifier;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
public class IdGenerator implements Serializable {
// pick the best pid from the list (consider date and pidtype)
public static <T extends OafEntity> String generate(List<Identifier<T>> pids, String defaultID) {
2021-08-11 12:13:22 +02:00
if (pids == null || pids.isEmpty())
return defaultID;
2022-11-09 14:20:59 +01:00
return generateId(pids);
}
private static <T extends OafEntity> String generateId(List<Identifier<T>> pids) {
Identifier<T> bp = pids
.stream()
.min(Identifier::compareTo)
2021-08-11 12:13:22 +02:00
.orElseThrow(() -> new IllegalStateException("unable to generate id"));
String prefix = substringBefore(bp.getOriginalID(), "|");
String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::");
String suffix = substringAfter(bp.getOriginalID(), "::");
final String pidType = substringBefore(ns, "_");
if (PidType.isValid(pidType)) {
return prefix + "|" + dedupify(ns) + "::" + suffix;
} else {
return prefix + "|dedup_wf_001::" + suffix;
}
}
private static String dedupify(String ns) {
2021-03-29 10:07:12 +02:00
StringBuilder prefix;
if (PidType.valueOf(substringBefore(ns, "_")) == PidType.openorgs) {
prefix = new StringBuilder(substringBefore(ns, "_"));
} else {
prefix = new StringBuilder(substringBefore(ns, "_")).append("_dedup");
}
while (prefix.length() < 12) {
prefix.append("_");
}
return prefix.substring(0, 12);
}
}