conflicts with a persistent org

This commit is contained in:
Michele Artini 2022-10-11 10:40:31 +02:00
parent c990fc945b
commit c4c7179009
2 changed files with 90 additions and 32 deletions

View File

@ -12,10 +12,12 @@ import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import javax.transaction.Transactional;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
@ -476,45 +478,94 @@ public class DatabaseUtils {
@Transactional
public String fixConflictSimilars(final List<String> similarIds, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
final List<OrganizationView> views =
similarIds.stream().map(organizationViewRepository::findById).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
// I create a new org
final OrganizationView newOrg = new OrganizationView();
newOrg.setId(null);
newOrg.setStatus(null);
newOrg.setName(findFirstString(views, OrganizationView::getName));
newOrg.setType(findFirstString(views, OrganizationView::getType));
newOrg.setLat(findFirstNumber(views, OrganizationView::getLat));
newOrg.setLng(findFirstNumber(views, OrganizationView::getLng));
newOrg.setCity(findFirstString(views, OrganizationView::getCity));
newOrg.setCountry(findFirstString(views, OrganizationView::getCountry));
newOrg.setOtherIdentifiers(findAll(views, OrganizationView::getOtherIdentifiers));
newOrg.setOtherNames(findAll(views, OrganizationView::getOtherNames));
newOrg.setAcronyms(findAll(views, OrganizationView::getAcronyms));
newOrg.setUrls(findAll(views, OrganizationView::getUrls));
newOrg.setRelations(findAll(views, OrganizationView::getRelations));
final List<OrganizationView> persistents = views.stream().filter(v -> v.getPersistent()).collect(Collectors.toList());
newOrg.getOtherNames()
.addAll(views.stream()
if (persistents.size() > 1) {
throw new RuntimeException("Too many persintent organizations");
} else if (persistents.size() > 1) {
backupOrg(persistents.get(0), user);
return fixConflicts(persistents.get(0), views, user);
} else {
// I create a new org
final OrganizationView masterOrg = new OrganizationView();
masterOrg.setId(null);
masterOrg.setStatus(null);
return fixConflicts(masterOrg, views, user);
}
}
private String backupOrg(final OrganizationView org, final String user) {
final String origId = org.getId();
final String backupId = origId + "::" + OffsetDateTime.now().toEpochSecond();
try {
final OrganizationView backupOrg = (OrganizationView) BeanUtils.cloneBean(org);
backupOrg.setId(backupId);
backupOrg.setStatus(OrganizationStatus.hidden.toString());
insertOrUpdateOrganization(backupOrg, user, false);
journalEntryRepository
.save(new JournalEntry(origId, JournalOperations.BACKUP_ORG, "Saved a backup copy: " + backupId, user));
journalEntryRepository
.save(new JournalEntry(backupId, JournalOperations.BACKUP_ORG, "Saved a backup copy of " + origId, user));
return backupId;
} catch (final Exception e) {
log.error("Error performing the backup of " + origId, e);
throw new RuntimeException("Error performing the backup of " + origId, e);
}
}
private String fixConflicts(final OrganizationView masterOrg, final List<OrganizationView> orgs, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
final String finalMessage = (masterOrg.getId() == null ? "New org created merging: " : "Merging in persistent org: ") +
orgs.stream()
.map(OrganizationView::getId)
.collect(Collectors.joining(", "));
masterOrg.setName(findFirstString(orgs, OrganizationView::getName));
masterOrg.setType(findFirstString(orgs, OrganizationView::getType));
masterOrg.setLat(findFirstNumber(orgs, OrganizationView::getLat));
masterOrg.setLng(findFirstNumber(orgs, OrganizationView::getLng));
masterOrg.setCity(findFirstString(orgs, OrganizationView::getCity));
masterOrg.setCountry(findFirstString(orgs, OrganizationView::getCountry));
masterOrg.setOtherIdentifiers(findAll(orgs, OrganizationView::getOtherIdentifiers));
masterOrg.setOtherNames(findAll(orgs, OrganizationView::getOtherNames));
masterOrg.setAcronyms(findAll(orgs, OrganizationView::getAcronyms));
masterOrg.setUrls(findAll(orgs, OrganizationView::getUrls));
masterOrg.setRelations(findAll(orgs, OrganizationView::getRelations, r -> !r.getType().equals(RelationType.Merged_In.toString())
&& !r.getType().equals(RelationType.Merged_In.toString())));
masterOrg.getOtherNames()
.addAll(orgs.stream()
.map(OrganizationView::getName)
.filter(StringUtils::isNotBlank)
.filter(s -> StringUtils.equalsIgnoreCase(s, newOrg.getName()))
.filter(s -> StringUtils.equalsIgnoreCase(s, masterOrg.getName()))
.map(s -> new eu.dnetlib.organizations.model.view.OtherName(s, "UNKNOWN"))
.collect(Collectors.toList()));
final String masterId = insertOrUpdateOrganization(newOrg, user, false);
final String masterId = insertOrUpdateOrganization(masterOrg, user, false);
// I hide the merged organizations
similarIds.forEach(id -> {
hideConflictOrgs(masterId, id);
journalEntryRepository.save(new JournalEntry(masterId, JournalOperations.FIX_CONFLICT, "The org has been hidded and merged in " + masterId, user));
});
orgs.stream()
.map(OrganizationView::getId)
.filter(id -> !id.equals(masterId))
.forEach(id -> {
hideConflictOrgs(masterId, id);
journalEntryRepository
.save(new JournalEntry(masterId, JournalOperations.FIX_CONFLICT, "The org has been hidded and merged in " + masterId, user));
});
// I reassign the duplicates to the new org
final List<OpenaireDuplicate> newDuplicates = similarIds.stream()
final List<OpenaireDuplicate> newDuplicates = orgs.stream()
.map(OrganizationView::getId)
.map(openaireDuplicateRepository::findByLocalId)
.flatMap(l -> l.stream())
.map(d -> new OpenaireDuplicate(masterId, d.getOaOriginalId(), d.getRelType(), d.getOaCollectedFrom()))
@ -526,18 +577,20 @@ public class DatabaseUtils {
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
});
for (final String similarId : similarIds) {
orgs.forEach(org -> {
final String similarId = org.getId();
openaireConflictRepository.updateMultipleStatusAndResetGroup(similarId, SimilarityType.is_different.toString(), user, now);
}
});
for (int i = 0; i < similarIds.size(); i++) {
for (int j = i + 1; j < similarIds.size(); j++) {
openaireConflictRepository.updateStatusAndResetGroup(similarIds.get(i), similarIds.get(j), SimilarityType.is_similar.toString(), user, now);
for (int i = 0; i < orgs.size(); i++) {
for (int j = i + 1; j < orgs.size(); j++) {
openaireConflictRepository
.updateStatusAndResetGroup(orgs.get(i).getId(), orgs.get(j).getId(), SimilarityType.is_similar.toString(), user, now);
}
}
journalEntryRepository
.save(new JournalEntry(masterId, JournalOperations.FIX_CONFLICT, "New org created merging: " + StringUtils.join(similarIds, ", "), user));
.save(new JournalEntry(masterId, JournalOperations.FIX_CONFLICT, finalMessage, user));
return masterId;
}
@ -580,6 +633,10 @@ public class DatabaseUtils {
return views.stream().map(mapper).flatMap(s -> s.stream()).collect(Collectors.toCollection(LinkedHashSet::new));
}
private <T> Set<T> findAll(final List<OrganizationView> views, final Function<OrganizationView, Set<T>> mapper, final Predicate<T> filter) {
return views.stream().map(mapper).flatMap(s -> s.stream()).filter(filter).collect(Collectors.toCollection(LinkedHashSet::new));
}
private List<Relationship> hideConflictOrgs(final String masterId, final String otherId) {
organizationRepository.updateStatus(otherId, OrganizationStatus.hidden.toString());
openaireConflictRepository.findById(new OpenaireConflictPK(masterId, otherId)).ifPresent(openaireConflictRepository::delete);

View File

@ -9,5 +9,6 @@ public enum JournalOperations {
DUPLICATES,
FIX_CONFLICT,
NO_CONFLICT,
BACKUP_ORG,
UNKNOWN
}