dnet-applications/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java

692 lines
27 KiB
Java
Raw Normal View History

2020-07-03 12:09:22 +02:00
package eu.dnetlib.organizations.utils;
import java.time.OffsetDateTime;
2021-05-05 15:12:53 +02:00
import java.util.ArrayList;
2020-07-03 12:09:22 +02:00
import java.util.Arrays;
import java.util.HashMap;
2020-10-19 15:12:08 +02:00
import java.util.LinkedHashSet;
2020-07-03 12:09:22 +02:00
import java.util.List;
import java.util.Map;
2020-10-19 15:12:08 +02:00
import java.util.Objects;
import java.util.Optional;
2020-07-03 12:09:22 +02:00
import java.util.Set;
2021-04-19 16:00:29 +02:00
import java.util.UUID;
2020-10-19 15:12:08 +02:00
import java.util.function.Function;
2022-10-11 10:40:31 +02:00
import java.util.function.Predicate;
2020-07-03 12:09:22 +02:00
import java.util.stream.Collectors;
import javax.transaction.Transactional;
2022-10-11 10:40:31 +02:00
import org.apache.commons.beanutils.BeanUtils;
2021-11-09 11:28:03 +01:00
import org.apache.commons.codec.digest.DigestUtils;
2020-10-05 12:16:49 +02:00
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
2020-07-03 12:09:22 +02:00
import org.springframework.beans.factory.annotation.Autowired;
2022-01-31 14:35:21 +01:00
import org.springframework.cache.annotation.CacheEvict;
2020-07-03 12:09:22 +02:00
import org.springframework.cache.annotation.Cacheable;
import org.springframework.jdbc.core.BeanPropertyRowMapper;
2020-07-03 12:09:22 +02:00
import org.springframework.jdbc.core.JdbcTemplate;
2021-05-05 15:12:53 +02:00
import org.springframework.scheduling.annotation.Scheduled;
2020-07-03 12:09:22 +02:00
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestBody;
import eu.dnetlib.organizations.controller.UserRole;
import eu.dnetlib.organizations.model.Acronym;
2021-04-09 12:17:11 +02:00
import eu.dnetlib.organizations.model.JournalEntry;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.OpenaireConflictPK;
2020-09-29 15:31:56 +02:00
import eu.dnetlib.organizations.model.OpenaireDuplicate;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.Organization;
import eu.dnetlib.organizations.model.OtherIdentifier;
import eu.dnetlib.organizations.model.OtherName;
2022-09-26 10:29:15 +02:00
import eu.dnetlib.organizations.model.PersistentOrganization;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.Relationship;
import eu.dnetlib.organizations.model.Url;
import eu.dnetlib.organizations.model.User;
import eu.dnetlib.organizations.model.UserCountry;
import eu.dnetlib.organizations.model.utils.BrowseEntry;
import eu.dnetlib.organizations.model.utils.OrganizationConflict;
2020-10-13 14:48:04 +02:00
import eu.dnetlib.organizations.model.utils.TempBrowseEntry;
2020-10-08 15:10:11 +02:00
import eu.dnetlib.organizations.model.utils.VocabularyTerm;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.view.OrganizationView;
2022-09-26 10:29:15 +02:00
import eu.dnetlib.organizations.model.view.PersistentOrganizationView;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.view.UserView;
import eu.dnetlib.organizations.repository.AcronymRepository;
2021-04-09 12:17:11 +02:00
import eu.dnetlib.organizations.repository.JournalEntryRepository;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.repository.OpenaireConflictRepository;
2020-09-29 15:31:56 +02:00
import eu.dnetlib.organizations.repository.OpenaireDuplicateRepository;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.repository.OrganizationRepository;
import eu.dnetlib.organizations.repository.OtherIdentifierRepository;
import eu.dnetlib.organizations.repository.OtherNameRepository;
2022-09-26 10:29:15 +02:00
import eu.dnetlib.organizations.repository.PersistentOrganizationRepository;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.repository.RelationshipRepository;
import eu.dnetlib.organizations.repository.UrlRepository;
import eu.dnetlib.organizations.repository.UserCountryRepository;
import eu.dnetlib.organizations.repository.UserRepository;
2020-10-19 15:12:08 +02:00
import eu.dnetlib.organizations.repository.readonly.OrganizationViewRepository;
2022-09-26 10:29:15 +02:00
import eu.dnetlib.organizations.repository.readonly.PersistentOrganizationViewRepository;
2020-07-03 12:09:22 +02:00
@Component
public class DatabaseUtils {
@Autowired
private AcronymRepository acronymRepository;
@Autowired
private OrganizationRepository organizationRepository;
@Autowired
private OtherIdentifierRepository otherIdentifierRepository;
@Autowired
private OtherNameRepository otherNameRepository;
@Autowired
private UrlRepository urlRepository;
@Autowired
private RelationshipRepository relationshipRepository;
@Autowired
private UserRepository userRepository;
@Autowired
private UserCountryRepository userCountryRepository;
@Autowired
private OpenaireConflictRepository openaireConflictRepository;
@Autowired
2020-09-29 15:31:56 +02:00
private OpenaireDuplicateRepository openaireDuplicateRepository;
2020-10-19 15:12:08 +02:00
@Autowired
private OrganizationViewRepository organizationViewRepository;
2021-04-09 12:17:11 +02:00
@Autowired
private JournalEntryRepository journalEntryRepository;
2022-09-26 10:29:15 +02:00
@Autowired
private PersistentOrganizationRepository persistentOrganizationRepository;
@Autowired
private PersistentOrganizationViewRepository persistentOrganizationViewRepository;
2020-10-19 15:12:08 +02:00
2020-09-29 15:31:56 +02:00
@Autowired
2020-07-03 12:09:22 +02:00
private JdbcTemplate jdbcTemplate;
private static final Log log = LogFactory.getLog(DatabaseUtils.class);
2020-07-03 12:09:22 +02:00
public enum VocabularyTable {
languages,
countries,
org_types,
id_types,
rel_types,
simrel_types
2020-07-03 12:09:22 +02:00
}
@Transactional
2020-10-19 15:12:08 +02:00
public String insertOrUpdateOrganization(final OrganizationView orgView, final String user, final boolean isSimpleUser) {
2021-04-19 16:00:29 +02:00
final String oldId = StringUtils.isNotBlank(orgView.getId()) ? new String(orgView.getId()) : null;
final String oldStatus = oldId != null ? organizationRepository.findById(oldId)
2020-10-19 15:12:08 +02:00
.map(Organization::getStatus)
2020-10-20 16:13:14 +02:00
.orElse(null) : null;
2020-10-19 15:12:08 +02:00
final boolean alreadyApproved = StringUtils.equals(oldStatus, OrganizationStatus.approved.toString());
2020-10-05 12:16:49 +02:00
2020-10-19 15:12:08 +02:00
final String newStatus;
if (!isSimpleUser) { // IS ADMIN
newStatus = OrganizationStatus.approved.toString();
} else if (isSimpleUser && oldStatus == null) {
newStatus = OrganizationStatus.suggested.toString();
} else if (isSimpleUser && alreadyApproved) {
newStatus = OrganizationStatus.approved.toString();
} else {
throw new RuntimeException("User not authorized");
}
2020-10-20 12:03:23 +02:00
if (oldId == null || !oldId.startsWith(OpenOrgsConstants.OPENORGS_PREFIX)) {
2021-04-19 16:00:29 +02:00
if (isSimpleUser) {
final String pendingId = OpenOrgsConstants.OPENORGS_PENDING_PREFIX + UUID.randomUUID();
orgView.setId(pendingId);
// to override the generation strategy of the ID
2022-10-11 12:36:02 +02:00
organizationRepository.prepareOrgWithId(pendingId);
2021-04-19 16:00:29 +02:00
} else {
orgView.setId(null); // The ID is generated by the DB
}
2020-10-05 12:16:49 +02:00
}
2020-10-07 17:04:29 +02:00
final Organization org = new Organization(orgView.getId(),
orgView.getName(),
orgView.getType(),
orgView.getLat(), orgView.getLng(),
orgView.getCity(), orgView.getCountry(),
2021-04-06 17:45:26 +02:00
newStatus,
orgView.getEcLegalBody(),
orgView.getEcLegalPerson(), orgView.getEcNonProfit(), orgView.getEcResearchOrganization(), orgView.getEcHigherEducation(),
orgView.getEcInternationalOrganizationEurInterests(), orgView.getEcInternationalOrganization(), orgView.getEcEnterprise(),
orgView.getEcSmeValidated(), orgView.getEcNutscode());
2020-07-03 12:09:22 +02:00
2020-10-07 17:04:29 +02:00
final String newId = organizationRepository.save(org).getId();
2020-07-03 12:09:22 +02:00
2020-10-20 12:03:23 +02:00
final OffsetDateTime now = OffsetDateTime.now();
2020-07-03 12:09:22 +02:00
2020-10-20 12:03:23 +02:00
if (StringUtils.equals(newId, oldId)) {
makeRelations(newId, orgView, true);
2020-07-03 12:09:22 +02:00
} else {
2020-10-20 12:03:23 +02:00
organizationRepository.updateCreationDate(newId, user, now);
makeRelations(newId, orgView, false);
if (oldId != null) {
2021-04-19 16:00:29 +02:00
final List<OpenaireDuplicate> dups = openaireDuplicateRepository.findByLocalId(oldId)
2020-10-21 12:27:14 +02:00
.stream()
2021-11-09 11:28:03 +01:00
.map(d -> prepareNewDuplicate(newId, oldId, d))
2021-04-19 16:00:29 +02:00
.collect(Collectors.toList());
2020-10-21 12:27:14 +02:00
openaireDuplicateRepository.saveAll(dups);
2021-04-19 16:00:29 +02:00
2020-10-28 09:48:46 +01:00
dups.forEach(d -> {
openaireDuplicateRepository.updateCreatedByIfMissing(d.getLocalId(), d.getOaOriginalId(), user);
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
});
2020-10-21 12:27:14 +02:00
2021-04-19 16:00:29 +02:00
if (oldId.startsWith(OpenOrgsConstants.OPENORGS_PENDING_PREFIX)) {
organizationRepository.deleteById(oldId);
}
2020-10-20 12:03:23 +02:00
}
2020-07-03 12:09:22 +02:00
}
2020-10-20 12:03:23 +02:00
2021-01-18 16:00:14 +01:00
organizationRepository.updateModificationDate(newId, user, now);
2021-04-09 12:17:11 +02:00
JournalOperations op = JournalOperations.UNKNOWN;
String message = "-";
if (newStatus.equals(OrganizationStatus.suggested.toString())) {
if (oldStatus == null) {
op = JournalOperations.NEW_SUGG_ORG;
message = "Created a new suggested org";
} else if (oldStatus != null) {
op = JournalOperations.EDIT_SUGG_ORG;
message = "Metadata updated";
}
} else if (newStatus.equals(OrganizationStatus.approved.toString())) {
if (oldStatus == null) {
op = JournalOperations.NEW_ORG;
message = "Created a new organization";
} else if (oldStatus.equals(OrganizationStatus.suggested.toString())) {
op = JournalOperations.APPROVE_SUGG_ORG;
message = "Approved the suggested org: " + oldId;
} else {
op = JournalOperations.EDIT_ORG;
message = "Metadata updated";
}
} else {
// IMPOSSIBLE ???
}
journalEntryRepository.save(new JournalEntry(newId, op, message, user));
2020-10-20 12:03:23 +02:00
return newId;
2020-07-03 12:09:22 +02:00
}
2021-11-09 11:28:03 +01:00
private OpenaireDuplicate prepareNewDuplicate(final String newId, final String oldId, final OpenaireDuplicate old) {
final OpenaireDuplicate d = new OpenaireDuplicate();
d.setLocalId(newId);
d.setOaOriginalId(old.getOaOriginalId());
d.setOaCollectedFrom(old.getOaCollectedFrom());
if (oldId != null
&& newId.startsWith(OpenOrgsConstants.OPENORGS_PREFIX)
&& oldId.startsWith(OpenOrgsConstants.OPENORGS_PENDING_PREFIX)
&& StringUtils.substringAfter(oldId, OpenOrgsConstants.OPENORGS_PENDING_PREFIX).equalsIgnoreCase(DigestUtils.md5Hex(d.getOaOriginalId()))) {
d.setRelType(SimilarityType.is_similar.toString());
} else {
d.setRelType(SimilarityType.suggested.toString());
}
return d;
}
2020-09-29 15:31:56 +02:00
@Transactional
2020-10-19 15:12:08 +02:00
public void saveDuplicates(final List<OpenaireDuplicate> simrels, final String user) {
2020-09-29 15:31:56 +02:00
final OffsetDateTime now = OffsetDateTime.now();
2021-05-05 15:12:53 +02:00
// Set is_different all the existing relations that refer to the new duplicates (suggested or is_similar)
2021-04-20 11:43:17 +02:00
2021-05-05 15:12:53 +02:00
final List<OpenaireDuplicate> toSave = new ArrayList<>();
toSave.addAll(simrels);
final List<OpenaireDuplicate> toDelete = new ArrayList<>();
for (final OpenaireDuplicate r1 : simrels) {
if (!r1.getRelType().equals(SimilarityType.is_different.toString())) {
for (final OpenaireDuplicate r2 : openaireDuplicateRepository.findByOaOriginalId(r1.getOaOriginalId())) {
if (r2.getLocalId().startsWith(OpenOrgsConstants.OPENORGS_PENDING_PREFIX)) {
toDelete.add(r2);
} else if (!r1.getLocalId().equals(r2.getLocalId())) {
2021-05-06 08:46:17 +02:00
r2.setRelType(SimilarityType.is_different.toString());
2021-05-05 15:12:53 +02:00
toSave.add(r2);
}
}
}
}
2021-05-05 15:12:53 +02:00
// Save the new rels
openaireDuplicateRepository.saveAll(toSave).forEach(d -> {
2020-10-28 09:48:46 +01:00
openaireDuplicateRepository.updateCreatedByIfMissing(d.getLocalId(), d.getOaOriginalId(), user);
2020-10-19 15:12:08 +02:00
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
2021-04-19 16:00:29 +02:00
});
2021-05-06 08:46:17 +02:00
log.debug("Simrels saved (contains also the fixed rels): " + toSave.size());
2021-05-05 15:12:53 +02:00
// delete rels to pending orgs
openaireDuplicateRepository.deleteAll(toDelete);
2021-05-06 08:46:17 +02:00
log.debug("Simrels related to a pending orgs deleted: " + toDelete.size());
2020-10-19 15:12:08 +02:00
2021-05-06 08:46:17 +02:00
// Updating journal
toSave.stream().collect(Collectors.groupingBy(OpenaireDuplicate::getLocalId)).forEach((id, list) -> {
final long sim = list.stream()
.filter(d -> d.getRelType().equals(SimilarityType.is_similar.toString()))
.count();
final long diff = list.stream()
2021-04-09 12:17:11 +02:00
.filter(d -> d.getRelType().equals(SimilarityType.is_different.toString()))
2021-05-06 08:46:17 +02:00
.count();
final long sugg = list.stream()
.filter(d -> d.getRelType().equals(SimilarityType.suggested.toString()))
.count();
final String message = String.format("Duplicates updated (%s similars, %s differents, %s suggested)", sim, diff, sugg);
2021-04-09 12:17:11 +02:00
2021-05-06 08:46:17 +02:00
journalEntryRepository.save(new JournalEntry(id, JournalOperations.DUPLICATES, message, user));
});;
2021-05-05 15:12:53 +02:00
}
@Scheduled(fixedRate = 300000)
public void verifyConsistency() {
2021-05-06 08:25:35 +02:00
log.debug("Verify consistency (START)");
2021-05-05 15:12:53 +02:00
// delete invalid pending orgs (without simrels)
final int n = jdbcTemplate
.update("delete from organizations where id in (select o.id from organizations o left outer join oa_duplicates d on (o.id = d.local_id) where o.status = 'suggested' and o.created_by = 'dedupWf' group by o.id having count(d.local_id) = 0)");
if (n > 0) {
log.info("Invalid pending orgs deleted: " + n);
}
2021-05-06 08:25:35 +02:00
log.debug("Verify consistency (END)");
2021-04-09 12:17:11 +02:00
2020-09-29 15:31:56 +02:00
}
2020-10-20 12:03:23 +02:00
private void makeRelations(final String orgId, final OrganizationView orgView, final boolean update) {
if (update) {
acronymRepository.deleteByOrgId(orgId);
otherNameRepository.deleteByOrgId(orgId);
otherIdentifierRepository.deleteByOrgId(orgId);
urlRepository.deleteByOrgId(orgId);
relationshipRepository.deleteById1(orgId);
relationshipRepository.deleteById2(orgId);
}
2020-07-03 12:09:22 +02:00
orgView.getAcronyms().forEach(s -> acronymRepository.save(new Acronym(orgId, s)));
orgView.getOtherNames().forEach(n -> otherNameRepository.save(new OtherName(orgId, n.getName(), n.getLang())));
orgView.getOtherIdentifiers().forEach(id -> otherIdentifierRepository.save(new OtherIdentifier(orgId, id.getId(), id.getType())));
orgView.getUrls().forEach(u -> urlRepository.save(new Url(orgId, u)));
orgView.getRelations().forEach(r -> makeRelation(orgId, r.getRelatedOrgId(), RelationType.valueOf(r.getType())));
}
@Cacheable("vocs")
2020-10-08 15:10:11 +02:00
public List<VocabularyTerm> listValuesOfVocabularyTable(final VocabularyTable table) {
final String sql = "select val as value, name as name from " + table + " order by name";
2020-10-08 15:10:11 +02:00
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(VocabularyTerm.class));
2020-07-03 12:09:22 +02:00
}
@Cacheable("countries_for_user")
2020-10-08 15:10:11 +02:00
public List<VocabularyTerm> listCountriesForUser(final String name) {
final String sql =
"select uc.country as value, c.name as name from user_countries uc left outer join countries c on (c.val = uc.country) where uc.email = ? order by c.name";
2020-10-08 15:10:11 +02:00
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(VocabularyTerm.class), name);
2020-07-03 12:09:22 +02:00
}
2022-01-31 14:35:21 +01:00
@CacheEvict(value = {
"vocs", "countries_for_user"
}, allEntries = true)
public void clearCache() {
log.info("All caches cleaned");
}
2020-07-03 12:09:22 +02:00
@Transactional
2022-09-20 13:43:54 +02:00
public void updateUser(@RequestBody final UserView userView) {
2020-07-03 12:09:22 +02:00
final User user = userRepository.findById(userView.getEmail()).orElseThrow(() -> new RuntimeException("User not found"));
2022-09-19 14:33:22 +02:00
user.setFullname(userView.getFullname());
user.setOrganization(userView.getOrganization());
user.setReferencePerson(userView.getReferencePerson());
user.setRequestMessage(userView.getRequestMessage());
2020-07-03 12:09:22 +02:00
user.setRole(userView.getRole());
user.setValid(userView.isValid());
userRepository.save(user);
userCountryRepository.deleteByEmail(userView.getEmail());
if (userView.getCountries() != null) {
userCountryRepository
.saveAll(Arrays.stream(userView.getCountries()).map(c -> new UserCountry(userView.getEmail(), c)).collect(Collectors.toList()));
2020-07-03 12:09:22 +02:00
}
}
@Transactional
public void deleteUser(final String email) {
userCountryRepository.deleteByEmail(email);
userRepository.deleteById(email);
}
@Transactional
2022-09-19 14:33:22 +02:00
public void newUser(final String email,
final String fullname,
final String organization,
final String referencePerson,
final String requestMessage,
final List<String> countries) {
2020-07-03 12:09:22 +02:00
final User user = new User();
user.setEmail(email);
2022-09-19 14:33:22 +02:00
user.setFullname(fullname);
user.setOrganization(organization);
user.setReferencePerson(referencePerson);
user.setRequestMessage(requestMessage);
2020-07-03 12:09:22 +02:00
user.setRole(UserRole.PENDING.name());
user.setValid(false);
2022-09-19 14:33:22 +02:00
2020-07-03 12:09:22 +02:00
userRepository.save(user);
2022-09-19 14:33:22 +02:00
2020-07-03 12:09:22 +02:00
if (countries != null) {
userCountryRepository.saveAll(countries.stream().map(c -> new UserCountry(email, c)).collect(Collectors.toList()));
}
}
@Transactional
public List<Relationship> makeRelation(final String id1, final String id2, final RelationType type) {
final Relationship r1 = new Relationship(id1, id2, type.toString());
final Relationship r2 = new Relationship(id2, id1, type.getInverse().toString());
relationshipRepository.save(r1);
relationshipRepository.save(r2);
return Arrays.asList(r1, r2);
}
// BROWSE BY COUNTRY
public List<BrowseEntry> browseCountries() {
final String sql =
2020-10-13 14:48:04 +02:00
"select o.country as code, c.name as name, o.status as group, count(o.status) as count from organizations o left outer join countries c on (o.country = c.val) group by o.country, c.name, o.status";
return listBrowseEntries(sql);
}
// BROWSE BY COUNTRY FOR USER
public List<BrowseEntry> browseCountriesForUser(final String email) {
final String sql =
2020-10-13 14:48:04 +02:00
"select o.country as code, c.name as name, o.status as group, count(o.status) as count from user_countries uc left outer join organizations o on (uc.country = o.country) left outer join countries c on (o.country = c.val) where uc.email=? group by o.country, c.name, o.status";
return listBrowseEntries(sql, email);
}
// BROWSE BY ORG TYPE
public List<BrowseEntry> browseTypes() {
final String sql =
2020-10-13 14:48:04 +02:00
"select type as code, type as name, status as group, count(status) as count from organizations group by type, status";
return listBrowseEntries(sql);
}
// BROWSE BY ORG TYPE FOR USER
public List<BrowseEntry> browseTypesForUser(final String email) {
2020-10-13 14:48:04 +02:00
final String sql = "select o.type as code, o.type as name,"
+ "o.status as group, count(o.status) as count "
+ "from organizations o "
+ "left outer join user_countries uc on (uc.country = o.country) "
+ "where uc.email=? "
2020-10-13 14:48:04 +02:00
+ "group by o.type, o.status";
return listBrowseEntries(sql, email);
}
private List<BrowseEntry> listBrowseEntries(final String sql, final Object... params) {
final Map<String, BrowseEntry> map = new HashMap<>();
for (final TempBrowseEntry t : jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(TempBrowseEntry.class), params)) {
if (StringUtils.isNotBlank(t.getCode())) {
if (!map.containsKey(t.getCode())) {
final BrowseEntry e = new BrowseEntry();
e.setCode(t.getCode());
e.setName(t.getName());
map.put(t.getCode(), e);
}
map.get(t.getCode()).getValues().put(t.getGroup(), t.getCount());
}
}
2020-10-13 14:48:04 +02:00
return map.values().stream().sorted((o1, o2) -> StringUtils.compare(o1.getName(), o2.getName())).collect(Collectors.toList());
}
public List<OrganizationConflict> listConflictsForId(final String id) {
final String sql =
2020-10-30 10:05:23 +01:00
"select o.id, o.name, o.type, o.city, o.country from oa_conflicts c left outer join organizations o on (c.id2 = o.id) where o.id is not null and c.id1 = ? and c.reltype = 'suggested'";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(OrganizationConflict.class), id);
}
2020-09-29 11:48:45 +02:00
@Transactional
2021-04-22 15:42:35 +02:00
public void importDedupEvents() throws Exception {
2022-04-13 14:45:56 +02:00
jdbcTemplate.update("CALL import_dedup_events();");
2021-04-22 15:42:35 +02:00
// verifyConflictGroups(true);
}
2021-01-15 12:21:10 +01:00
@Transactional
public void updateFulltextIndex() {
try {
log.info("Updating Fulltext Index...");
jdbcTemplate.queryForList("SELECT refresh_index_search()");
log.info("...done");
} catch (final Exception e) {
log.error("Error updating Fulltext Index", e);
}
}
2020-10-19 15:12:08 +02:00
@Transactional
2020-10-30 10:05:23 +01:00
public String fixConflictSimilars(final List<String> similarIds, final String user) {
2020-10-19 15:12:08 +02:00
final List<OrganizationView> views =
2020-10-29 13:18:11 +01:00
similarIds.stream().map(organizationViewRepository::findById).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
2020-10-19 15:12:08 +02:00
2022-10-11 10:40:31 +02:00
final List<OrganizationView> persistents = views.stream().filter(v -> v.getPersistent()).collect(Collectors.toList());
2022-10-11 12:36:02 +02:00
final OrganizationView masterOrg = new OrganizationView();
2022-10-11 10:40:31 +02:00
if (persistents.size() > 1) {
throw new RuntimeException("Too many persintent organizations");
2022-10-11 11:33:45 +02:00
} else if (persistents.size() == 1) {
2022-10-11 10:40:31 +02:00
backupOrg(persistents.get(0), user);
2022-10-11 12:36:02 +02:00
masterOrg.setId(persistents.get(0).getId());
masterOrg.setStatus(OrganizationStatus.approved.toString());
2022-10-11 10:40:31 +02:00
} else {
masterOrg.setId(null);
masterOrg.setStatus(null);
}
2022-10-11 12:36:02 +02:00
return fixConflicts(masterOrg, views, user);
2022-10-11 10:40:31 +02:00
}
private String backupOrg(final OrganizationView org, final String user) {
final String origId = org.getId();
final String backupId = origId + "::" + OffsetDateTime.now().toEpochSecond();
2022-10-11 12:36:02 +02:00
organizationRepository.prepareOrgWithId(backupId);
2022-10-11 10:40:31 +02:00
try {
final OrganizationView backupOrg = (OrganizationView) BeanUtils.cloneBean(org);
backupOrg.setId(backupId);
insertOrUpdateOrganization(backupOrg, user, false);
2022-10-11 12:36:02 +02:00
organizationRepository.updateStatus(backupId, OrganizationStatus.hidden.toString());
2022-10-11 10:40:31 +02:00
journalEntryRepository
.save(new JournalEntry(origId, JournalOperations.BACKUP_ORG, "Saved a backup copy: " + backupId, user));
journalEntryRepository
.save(new JournalEntry(backupId, JournalOperations.BACKUP_ORG, "Saved a backup copy of " + origId, user));
return backupId;
} catch (final Exception e) {
log.error("Error performing the backup of " + origId, e);
throw new RuntimeException("Error performing the backup of " + origId, e);
}
}
private String fixConflicts(final OrganizationView masterOrg, final List<OrganizationView> orgs, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
final String finalMessage = (masterOrg.getId() == null ? "New org created merging: " : "Merging in persistent org: ") +
orgs.stream()
.map(OrganizationView::getId)
.collect(Collectors.joining(", "));
masterOrg.setName(findFirstString(orgs, OrganizationView::getName));
masterOrg.setType(findFirstString(orgs, OrganizationView::getType));
masterOrg.setLat(findFirstNumber(orgs, OrganizationView::getLat));
masterOrg.setLng(findFirstNumber(orgs, OrganizationView::getLng));
masterOrg.setCity(findFirstString(orgs, OrganizationView::getCity));
masterOrg.setCountry(findFirstString(orgs, OrganizationView::getCountry));
masterOrg.setOtherIdentifiers(findAll(orgs, OrganizationView::getOtherIdentifiers));
masterOrg.setOtherNames(findAll(orgs, OrganizationView::getOtherNames));
masterOrg.setAcronyms(findAll(orgs, OrganizationView::getAcronyms));
masterOrg.setUrls(findAll(orgs, OrganizationView::getUrls));
masterOrg.setRelations(findAll(orgs, OrganizationView::getRelations, r -> !r.getType().equals(RelationType.Merged_In.toString())
&& !r.getType().equals(RelationType.Merged_In.toString())));
masterOrg.getOtherNames()
.addAll(orgs.stream()
2020-10-20 16:13:14 +02:00
.map(OrganizationView::getName)
.filter(StringUtils::isNotBlank)
2022-10-11 10:40:31 +02:00
.filter(s -> StringUtils.equalsIgnoreCase(s, masterOrg.getName()))
2020-10-20 16:13:14 +02:00
.map(s -> new eu.dnetlib.organizations.model.view.OtherName(s, "UNKNOWN"))
.collect(Collectors.toList()));
2022-10-11 10:40:31 +02:00
final String masterId = insertOrUpdateOrganization(masterOrg, user, false);
2020-10-19 15:12:08 +02:00
// I hide the merged organizations
2022-10-11 10:40:31 +02:00
orgs.stream()
.map(OrganizationView::getId)
.filter(id -> !id.equals(masterId))
.forEach(id -> {
hideConflictOrgs(masterId, id);
journalEntryRepository
2022-10-11 12:36:02 +02:00
.save(new JournalEntry(id, JournalOperations.FIX_CONFLICT, "The org has been hidded and merged in " + masterId, user));
2022-10-11 10:40:31 +02:00
});
2020-10-19 15:12:08 +02:00
2020-10-29 13:18:11 +01:00
// I reassign the duplicates to the new org
2022-10-11 10:40:31 +02:00
final List<OpenaireDuplicate> newDuplicates = orgs.stream()
.map(OrganizationView::getId)
2020-10-19 15:12:08 +02:00
.map(openaireDuplicateRepository::findByLocalId)
.flatMap(l -> l.stream())
2020-10-29 13:45:26 +01:00
.map(d -> new OpenaireDuplicate(masterId, d.getOaOriginalId(), d.getRelType(), d.getOaCollectedFrom()))
2020-10-19 15:12:08 +02:00
.collect(Collectors.toList());
2020-10-29 13:18:11 +01:00
openaireDuplicateRepository.saveAll(newDuplicates);
2020-10-29 13:32:09 +01:00
newDuplicates.forEach(d -> {
openaireDuplicateRepository.updateCreatedByIfMissing(d.getLocalId(), d.getOaOriginalId(), user);
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
});
2020-10-20 16:13:14 +02:00
2022-10-11 10:40:31 +02:00
orgs.forEach(org -> {
final String similarId = org.getId();
2020-10-29 13:18:11 +01:00
openaireConflictRepository.updateMultipleStatusAndResetGroup(similarId, SimilarityType.is_different.toString(), user, now);
2022-10-11 10:40:31 +02:00
});
2020-10-29 13:18:11 +01:00
2022-10-11 10:40:31 +02:00
for (int i = 0; i < orgs.size(); i++) {
for (int j = i + 1; j < orgs.size(); j++) {
openaireConflictRepository
.updateStatusAndResetGroup(orgs.get(i).getId(), orgs.get(j).getId(), SimilarityType.is_similar.toString(), user, now);
2020-10-20 16:13:14 +02:00
}
}
2021-04-09 12:17:11 +02:00
journalEntryRepository
2022-10-11 10:40:31 +02:00
.save(new JournalEntry(masterId, JournalOperations.FIX_CONFLICT, finalMessage, user));
2021-04-09 12:17:11 +02:00
2020-10-19 15:12:08 +02:00
return masterId;
}
2020-10-30 10:05:23 +01:00
@Transactional
public void fixConflictDifferents(final List<String> differentsIds, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
2021-04-09 12:17:11 +02:00
final String message = "Mark the following orgs as different: " + StringUtils.join(differentsIds, ", ");
2020-10-30 10:05:23 +01:00
for (int i = 0; i < differentsIds.size(); i++) {
for (int j = i + 1; j < differentsIds.size(); j++) {
openaireConflictRepository
.updateStatusAndResetGroup(differentsIds.get(i), differentsIds.get(j), SimilarityType.is_different.toString(), user, now);
}
2021-04-09 12:17:11 +02:00
journalEntryRepository.save(new JournalEntry(differentsIds.get(i), JournalOperations.NO_CONFLICT, message, user));
2020-10-30 10:05:23 +01:00
}
}
2020-11-12 15:47:41 +01:00
@Transactional
public Optional<User> findUser(final String email) {
2022-09-19 14:33:22 +02:00
return userRepository.findById(email);
}
@Transactional
public void updateUserDetails(final String email, final String fullname, final String organization) {
userRepository.updateDetails(email, fullname, organization, OffsetDateTime.now());
2020-11-12 15:47:41 +01:00
}
2020-10-19 15:12:08 +02:00
private String findFirstString(final List<OrganizationView> views, final Function<OrganizationView, String> mapper) {
return views.stream().map(mapper).filter(StringUtils::isNotBlank).findFirst().orElse(null);
}
private Double findFirstNumber(final List<OrganizationView> views, final Function<OrganizationView, Double> mapper) {
return views.stream().map(mapper).filter(Objects::nonNull).filter(n -> n != 0).findFirst().orElse(0.0);
}
private <T> Set<T> findAll(final List<OrganizationView> views, final Function<OrganizationView, Set<T>> mapper) {
return views.stream().map(mapper).flatMap(s -> s.stream()).collect(Collectors.toCollection(LinkedHashSet::new));
}
2022-10-11 10:40:31 +02:00
private <T> Set<T> findAll(final List<OrganizationView> views, final Function<OrganizationView, Set<T>> mapper, final Predicate<T> filter) {
return views.stream().map(mapper).flatMap(s -> s.stream()).filter(filter).collect(Collectors.toCollection(LinkedHashSet::new));
}
2020-10-19 15:12:08 +02:00
private List<Relationship> hideConflictOrgs(final String masterId, final String otherId) {
2020-10-07 17:04:29 +02:00
organizationRepository.updateStatus(otherId, OrganizationStatus.hidden.toString());
openaireConflictRepository.findById(new OpenaireConflictPK(masterId, otherId)).ifPresent(openaireConflictRepository::delete);
openaireConflictRepository.findById(new OpenaireConflictPK(otherId, masterId)).ifPresent(openaireConflictRepository::delete);
return makeRelation(masterId, otherId, RelationType.Merges);
}
2021-09-29 16:05:16 +02:00
public List<String> invalidCountriesInSuggestions() {
final String sql =
" select distinct t.oa_country from tmp_dedup_events t left outer join countries c on (t.oa_country = c.val) where c.val is null order by t.oa_country";
return jdbcTemplate.queryForList(sql, String.class);
}
2022-09-26 10:29:15 +02:00
public Iterable<PersistentOrganizationView> listPersistentOrgs() {
return persistentOrganizationViewRepository.findAll();
}
public String addPersistentOrgs(final String id) {
final boolean valid;
final String ooid;
if (id.length() == 46) {
final Optional<OrganizationView> orgView = organizationViewRepository.findByOpenaireId(id);
valid = orgView.map(OrganizationView::getStatus)
.filter(s -> s.equals(OrganizationStatus.approved.toString()))
.isPresent();
ooid = orgView.get().getId();
} else {
valid = organizationRepository.findById(id)
.map(Organization::getStatus)
.filter(s -> s.equals(OrganizationStatus.approved.toString()))
.isPresent();
ooid = id;
}
2022-09-26 10:29:15 +02:00
if (valid) {
persistentOrganizationRepository.save(new PersistentOrganization(ooid));
return ooid;
2022-09-26 10:29:15 +02:00
} else {
throw new RuntimeException("The ID does not refer to an approved Organization");
}
}
public void deletePersistentOrgs(final String id) {
persistentOrganizationRepository.deleteById(id);
}
2020-07-03 12:09:22 +02:00
}