dnet-applications/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java

444 lines
18 KiB
Java
Raw Normal View History

2020-07-03 12:09:22 +02:00
package eu.dnetlib.organizations.utils;
import java.time.OffsetDateTime;
2020-10-21 12:27:14 +02:00
import java.util.ArrayList;
2020-07-03 12:09:22 +02:00
import java.util.Arrays;
import java.util.HashMap;
2020-10-19 15:12:08 +02:00
import java.util.LinkedHashSet;
2020-07-03 12:09:22 +02:00
import java.util.List;
import java.util.Map;
2020-10-19 15:12:08 +02:00
import java.util.Objects;
import java.util.Optional;
2020-07-03 12:09:22 +02:00
import java.util.Set;
2020-10-19 15:12:08 +02:00
import java.util.function.Function;
2020-07-03 12:09:22 +02:00
import java.util.stream.Collectors;
import javax.transaction.Transactional;
2020-10-14 16:28:00 +02:00
import org.apache.commons.io.IOUtils;
2020-10-05 12:16:49 +02:00
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
2020-07-03 12:09:22 +02:00
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.jdbc.core.BeanPropertyRowMapper;
2020-07-03 12:09:22 +02:00
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestBody;
import eu.dnetlib.organizations.controller.UserRole;
import eu.dnetlib.organizations.model.Acronym;
import eu.dnetlib.organizations.model.OpenaireConflictPK;
2020-09-29 15:31:56 +02:00
import eu.dnetlib.organizations.model.OpenaireDuplicate;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.Organization;
import eu.dnetlib.organizations.model.OtherIdentifier;
import eu.dnetlib.organizations.model.OtherName;
import eu.dnetlib.organizations.model.Relationship;
import eu.dnetlib.organizations.model.Url;
import eu.dnetlib.organizations.model.User;
import eu.dnetlib.organizations.model.UserCountry;
import eu.dnetlib.organizations.model.utils.BrowseEntry;
import eu.dnetlib.organizations.model.utils.OrganizationConflict;
2020-10-13 14:48:04 +02:00
import eu.dnetlib.organizations.model.utils.TempBrowseEntry;
2020-10-08 15:10:11 +02:00
import eu.dnetlib.organizations.model.utils.VocabularyTerm;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.view.OrganizationView;
import eu.dnetlib.organizations.model.view.UserView;
import eu.dnetlib.organizations.repository.AcronymRepository;
import eu.dnetlib.organizations.repository.OpenaireConflictRepository;
2020-09-29 15:31:56 +02:00
import eu.dnetlib.organizations.repository.OpenaireDuplicateRepository;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.repository.OrganizationRepository;
import eu.dnetlib.organizations.repository.OtherIdentifierRepository;
import eu.dnetlib.organizations.repository.OtherNameRepository;
import eu.dnetlib.organizations.repository.RelationshipRepository;
import eu.dnetlib.organizations.repository.UrlRepository;
import eu.dnetlib.organizations.repository.UserCountryRepository;
import eu.dnetlib.organizations.repository.UserRepository;
2020-10-19 15:12:08 +02:00
import eu.dnetlib.organizations.repository.readonly.OrganizationViewRepository;
2020-07-03 12:09:22 +02:00
@Component
public class DatabaseUtils {
@Autowired
private AcronymRepository acronymRepository;
@Autowired
private OrganizationRepository organizationRepository;
@Autowired
private OtherIdentifierRepository otherIdentifierRepository;
@Autowired
private OtherNameRepository otherNameRepository;
@Autowired
private UrlRepository urlRepository;
@Autowired
private RelationshipRepository relationshipRepository;
@Autowired
private UserRepository userRepository;
@Autowired
private UserCountryRepository userCountryRepository;
@Autowired
private OpenaireConflictRepository openaireConflictRepository;
@Autowired
2020-09-29 15:31:56 +02:00
private OpenaireDuplicateRepository openaireDuplicateRepository;
2020-10-19 15:12:08 +02:00
@Autowired
private OrganizationViewRepository organizationViewRepository;
2020-09-29 15:31:56 +02:00
@Autowired
2020-07-03 12:09:22 +02:00
private JdbcTemplate jdbcTemplate;
private static final Log log = LogFactory.getLog(DatabaseUtils.class);
2020-07-03 12:09:22 +02:00
public enum VocabularyTable {
languages,
countries,
org_types,
id_types,
rel_types,
simrel_types
2020-07-03 12:09:22 +02:00
}
@Transactional
2020-10-19 15:12:08 +02:00
public String insertOrUpdateOrganization(final OrganizationView orgView, final String user, final boolean isSimpleUser) {
2020-10-20 16:13:14 +02:00
final String oldStatus = orgView.getId() != null ? organizationRepository.findById(orgView.getId())
2020-10-19 15:12:08 +02:00
.map(Organization::getStatus)
2020-10-20 16:13:14 +02:00
.orElse(null) : null;
2020-10-19 15:12:08 +02:00
final boolean alreadyApproved = StringUtils.equals(oldStatus, OrganizationStatus.approved.toString());
2020-10-05 12:16:49 +02:00
2020-10-19 15:12:08 +02:00
final String newStatus;
if (!isSimpleUser) { // IS ADMIN
newStatus = OrganizationStatus.approved.toString();
} else if (isSimpleUser && oldStatus == null) {
newStatus = OrganizationStatus.suggested.toString();
} else if (isSimpleUser && alreadyApproved) {
newStatus = OrganizationStatus.approved.toString();
} else {
throw new RuntimeException("User not authorized");
}
2020-10-20 12:03:23 +02:00
final String oldId = StringUtils.isNotBlank(orgView.getId()) ? new String(orgView.getId()) : null;
if (oldId == null || !oldId.startsWith(OpenOrgsConstants.OPENORGS_PREFIX)) {
2020-10-07 17:04:29 +02:00
orgView.setId(null);
2020-10-05 12:16:49 +02:00
}
2020-10-07 17:04:29 +02:00
final Organization org = new Organization(orgView.getId(),
orgView.getName(),
orgView.getType(),
orgView.getLat(), orgView.getLng(),
orgView.getCity(), orgView.getCountry(),
2020-10-19 15:12:08 +02:00
newStatus);
2020-07-03 12:09:22 +02:00
2020-10-07 17:04:29 +02:00
final String newId = organizationRepository.save(org).getId();
2020-07-03 12:09:22 +02:00
2020-10-20 12:03:23 +02:00
final OffsetDateTime now = OffsetDateTime.now();
2020-07-03 12:09:22 +02:00
2020-10-20 12:03:23 +02:00
organizationRepository.updateModificationDate(newId, user, now);
2020-10-07 17:04:29 +02:00
2020-10-20 12:03:23 +02:00
if (StringUtils.equals(newId, oldId)) {
makeRelations(newId, orgView, true);
2020-07-03 12:09:22 +02:00
} else {
2020-10-20 12:03:23 +02:00
organizationRepository.updateCreationDate(newId, user, now);
makeRelations(newId, orgView, false);
if (oldId != null) {
2020-10-21 12:27:14 +02:00
final List<OpenaireDuplicate> dups = new ArrayList<>();
2020-10-29 13:45:26 +01:00
dups.add(new OpenaireDuplicate(newId, oldId, SimilarityType.is_similar.toString(), ""));
2020-10-21 12:27:14 +02:00
dups.addAll(openaireDuplicateRepository.findByLocalId(oldId)
.stream()
2020-10-29 13:45:26 +01:00
.map(d -> new OpenaireDuplicate(newId, d.getOaOriginalId(), SimilarityType.suggested.toString(), d.getOaCollectedFrom()))
2020-10-21 12:27:14 +02:00
.collect(Collectors.toList()));
openaireDuplicateRepository.saveAll(dups);
2020-10-28 09:48:46 +01:00
dups.forEach(d -> {
openaireDuplicateRepository.updateCreatedByIfMissing(d.getLocalId(), d.getOaOriginalId(), user);
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
});
2020-10-21 12:27:14 +02:00
2020-10-20 12:03:23 +02:00
organizationRepository.updateStatus(oldId, OrganizationStatus.duplicate.toString());
organizationRepository.updateModificationDate(oldId, user, now);
}
2020-07-03 12:09:22 +02:00
}
2020-10-20 12:03:23 +02:00
return newId;
2020-07-03 12:09:22 +02:00
}
2020-09-29 15:31:56 +02:00
@Transactional
2020-10-19 15:12:08 +02:00
public void saveDuplicates(final List<OpenaireDuplicate> simrels, final String user) {
2020-09-29 15:31:56 +02:00
final OffsetDateTime now = OffsetDateTime.now();
final List<OpenaireDuplicate> list = openaireDuplicateRepository.saveAll(simrels);
2020-10-19 15:12:08 +02:00
list.forEach(d -> {
2020-10-28 09:48:46 +01:00
openaireDuplicateRepository.updateCreatedByIfMissing(d.getLocalId(), d.getOaOriginalId(), user);
2020-10-19 15:12:08 +02:00
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
2020-10-21 12:27:14 +02:00
if (d.getRelType().equals(SimilarityType.is_different.toString())) {
2020-10-20 12:03:23 +02:00
updateStatus(d.getOaOriginalId(), OrganizationStatus.suggested, user, now);
2020-10-21 12:27:14 +02:00
} else {
updateStatus(d.getOaOriginalId(), OrganizationStatus.duplicate, user, now);
2020-10-19 15:12:08 +02:00
}
});
2020-09-29 15:31:56 +02:00
}
2020-10-20 12:03:23 +02:00
private void makeRelations(final String orgId, final OrganizationView orgView, final boolean update) {
if (update) {
acronymRepository.deleteByOrgId(orgId);
otherNameRepository.deleteByOrgId(orgId);
otherIdentifierRepository.deleteByOrgId(orgId);
urlRepository.deleteByOrgId(orgId);
relationshipRepository.deleteById1(orgId);
relationshipRepository.deleteById2(orgId);
}
2020-07-03 12:09:22 +02:00
orgView.getAcronyms().forEach(s -> acronymRepository.save(new Acronym(orgId, s)));
orgView.getOtherNames().forEach(n -> otherNameRepository.save(new OtherName(orgId, n.getName(), n.getLang())));
orgView.getOtherIdentifiers().forEach(id -> otherIdentifierRepository.save(new OtherIdentifier(orgId, id.getId(), id.getType())));
orgView.getUrls().forEach(u -> urlRepository.save(new Url(orgId, u)));
orgView.getRelations().forEach(r -> makeRelation(orgId, r.getRelatedOrgId(), RelationType.valueOf(r.getType())));
}
@Cacheable("vocs")
2020-10-08 15:10:11 +02:00
public List<VocabularyTerm> listValuesOfVocabularyTable(final VocabularyTable table) {
final String sql = "select val as value, name as name from " + table + " order by name";
2020-10-08 15:10:11 +02:00
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(VocabularyTerm.class));
2020-07-03 12:09:22 +02:00
}
@Cacheable("countries_for_user")
2020-10-08 15:10:11 +02:00
public List<VocabularyTerm> listCountriesForUser(final String name) {
final String sql =
"select uc.country as value, c.name as name from user_countries uc left outer join countries c on (c.val = uc.country) where uc.email = ? order by c.name";
2020-10-08 15:10:11 +02:00
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(VocabularyTerm.class), name);
2020-07-03 12:09:22 +02:00
}
@Transactional
public void saveUser(@RequestBody final UserView userView) {
final User user = userRepository.findById(userView.getEmail()).orElseThrow(() -> new RuntimeException("User not found"));
user.setRole(userView.getRole());
user.setValid(userView.isValid());
userRepository.save(user);
userCountryRepository.deleteByEmail(userView.getEmail());
if (userView.getCountries() != null) {
userCountryRepository
.saveAll(Arrays.stream(userView.getCountries()).map(c -> new UserCountry(userView.getEmail(), c)).collect(Collectors.toList()));
2020-07-03 12:09:22 +02:00
}
}
@Transactional
public void deleteUser(final String email) {
userCountryRepository.deleteByEmail(email);
userRepository.deleteById(email);
}
@Transactional
public void newUser(final String email, final List<String> countries) {
final User user = new User();
user.setEmail(email);
user.setRole(UserRole.PENDING.name());
user.setValid(false);
userRepository.save(user);
if (countries != null) {
userCountryRepository.saveAll(countries.stream().map(c -> new UserCountry(email, c)).collect(Collectors.toList()));
}
}
@Transactional
public List<Relationship> makeRelation(final String id1, final String id2, final RelationType type) {
final Relationship r1 = new Relationship(id1, id2, type.toString());
final Relationship r2 = new Relationship(id2, id1, type.getInverse().toString());
relationshipRepository.save(r1);
relationshipRepository.save(r2);
return Arrays.asList(r1, r2);
}
// BROWSE BY COUNTRY
public List<BrowseEntry> browseCountries() {
final String sql =
2020-10-13 14:48:04 +02:00
"select o.country as code, c.name as name, o.status as group, count(o.status) as count from organizations o left outer join countries c on (o.country = c.val) group by o.country, c.name, o.status";
return listBrowseEntries(sql);
}
// BROWSE BY COUNTRY FOR USER
public List<BrowseEntry> browseCountriesForUser(final String email) {
final String sql =
2020-10-13 14:48:04 +02:00
"select o.country as code, c.name as name, o.status as group, count(o.status) as count from user_countries uc left outer join organizations o on (uc.country = o.country) left outer join countries c on (o.country = c.val) where uc.email=? group by o.country, c.name, o.status";
return listBrowseEntries(sql, email);
}
// BROWSE BY ORG TYPE
public List<BrowseEntry> browseTypes() {
final String sql =
2020-10-13 14:48:04 +02:00
"select type as code, type as name, status as group, count(status) as count from organizations group by type, status";
return listBrowseEntries(sql);
}
// BROWSE BY ORG TYPE FOR USER
public List<BrowseEntry> browseTypesForUser(final String email) {
2020-10-13 14:48:04 +02:00
final String sql = "select o.type as code, o.type as name,"
+ "o.status as group, count(o.status) as count "
+ "from organizations o "
+ "left outer join user_countries uc on (uc.country = o.country) "
+ "where uc.email=? "
2020-10-13 14:48:04 +02:00
+ "group by o.type, o.status";
return listBrowseEntries(sql, email);
}
private List<BrowseEntry> listBrowseEntries(final String sql, final Object... params) {
final Map<String, BrowseEntry> map = new HashMap<>();
for (final TempBrowseEntry t : jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(TempBrowseEntry.class), params)) {
if (StringUtils.isNotBlank(t.getCode())) {
if (!map.containsKey(t.getCode())) {
final BrowseEntry e = new BrowseEntry();
e.setCode(t.getCode());
e.setName(t.getName());
map.put(t.getCode(), e);
}
map.get(t.getCode()).getValues().put(t.getGroup(), t.getCount());
}
}
2020-10-13 14:48:04 +02:00
return map.values().stream().sorted((o1, o2) -> StringUtils.compare(o1.getName(), o2.getName())).collect(Collectors.toList());
}
public List<OrganizationConflict> listConflictsForId(final String id) {
final String sql =
2020-10-30 10:05:23 +01:00
"select o.id, o.name, o.type, o.city, o.country from oa_conflicts c left outer join organizations o on (c.id2 = o.id) where o.id is not null and c.id1 = ? and c.reltype = 'suggested'";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(OrganizationConflict.class), id);
}
2020-09-29 11:48:45 +02:00
@Transactional
public void importDedupEvents() {
try {
2020-10-14 16:28:00 +02:00
log.info("Importing conflicts and duplicates...");
jdbcTemplate.update(IOUtils.toString(getClass().getResourceAsStream("/sql/importDedupEvents.sql")));
log.info("...done");
// verifyConflictGroups(true);
} catch (final Exception e) {
2020-09-29 11:34:31 +02:00
log.error("Error importing conflicts and duplicates", e);
}
}
2021-01-15 12:21:10 +01:00
@Transactional
public void updateFulltextIndex() {
try {
log.info("Updating Fulltext Index...");
jdbcTemplate.queryForList("SELECT refresh_index_search()");
log.info("...done");
} catch (final Exception e) {
log.error("Error updating Fulltext Index", e);
}
}
2020-10-20 12:03:23 +02:00
private void updateStatus(final String id, final OrganizationStatus status, final String user, final OffsetDateTime now) {
2020-10-19 15:12:08 +02:00
organizationRepository.updateStatus(id, status.toString());
2020-10-20 12:03:23 +02:00
organizationRepository.updateModificationDate(id, user, now);
2020-10-19 15:12:08 +02:00
}
@Transactional
2020-10-30 10:05:23 +01:00
public String fixConflictSimilars(final List<String> similarIds, final String user) {
2020-10-19 15:12:08 +02:00
2020-10-29 13:32:09 +01:00
final OffsetDateTime now = OffsetDateTime.now();
2020-10-19 15:12:08 +02:00
final List<OrganizationView> views =
2020-10-29 13:18:11 +01:00
similarIds.stream().map(organizationViewRepository::findById).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
2020-10-19 15:12:08 +02:00
// I create a new org
final OrganizationView newOrg = new OrganizationView();
newOrg.setId(null);
newOrg.setStatus(null);
newOrg.setName(findFirstString(views, OrganizationView::getName));
newOrg.setType(findFirstString(views, OrganizationView::getType));
newOrg.setLat(findFirstNumber(views, OrganizationView::getLat));
newOrg.setLng(findFirstNumber(views, OrganizationView::getLng));
newOrg.setCity(findFirstString(views, OrganizationView::getCity));
newOrg.setCountry(findFirstString(views, OrganizationView::getCountry));
newOrg.setOtherIdentifiers(findAll(views, OrganizationView::getOtherIdentifiers));
newOrg.setOtherNames(findAll(views, OrganizationView::getOtherNames));
newOrg.setAcronyms(findAll(views, OrganizationView::getAcronyms));
newOrg.setUrls(findAll(views, OrganizationView::getUrls));
newOrg.setRelations(findAll(views, OrganizationView::getRelations));
2020-10-20 16:13:14 +02:00
newOrg.getOtherNames()
.addAll(views.stream()
.map(OrganizationView::getName)
.filter(StringUtils::isNotBlank)
.filter(s -> StringUtils.equalsIgnoreCase(s, newOrg.getName()))
.map(s -> new eu.dnetlib.organizations.model.view.OtherName(s, "UNKNOWN"))
.collect(Collectors.toList()));
2020-10-19 15:12:08 +02:00
final String masterId = insertOrUpdateOrganization(newOrg, user, false);
// I hide the merged organizations
2020-10-29 13:18:11 +01:00
similarIds.forEach(id -> hideConflictOrgs(masterId, id));
2020-10-19 15:12:08 +02:00
2020-10-29 13:18:11 +01:00
// I reassign the duplicates to the new org
final List<OpenaireDuplicate> newDuplicates = similarIds.stream()
2020-10-19 15:12:08 +02:00
.map(openaireDuplicateRepository::findByLocalId)
.flatMap(l -> l.stream())
2020-10-29 13:45:26 +01:00
.map(d -> new OpenaireDuplicate(masterId, d.getOaOriginalId(), d.getRelType(), d.getOaCollectedFrom()))
2020-10-19 15:12:08 +02:00
.collect(Collectors.toList());
2020-10-29 13:18:11 +01:00
openaireDuplicateRepository.saveAll(newDuplicates);
2020-10-29 13:32:09 +01:00
newDuplicates.forEach(d -> {
openaireDuplicateRepository.updateCreatedByIfMissing(d.getLocalId(), d.getOaOriginalId(), user);
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
});
2020-10-20 16:13:14 +02:00
2020-10-29 13:18:11 +01:00
for (final String similarId : similarIds) {
openaireConflictRepository.updateMultipleStatusAndResetGroup(similarId, SimilarityType.is_different.toString(), user, now);
}
for (int i = 0; i < similarIds.size(); i++) {
for (int j = i + 1; j < similarIds.size(); j++) {
openaireConflictRepository.updateStatusAndResetGroup(similarIds.get(i), similarIds.get(j), SimilarityType.is_similar.toString(), user, now);
2020-10-20 16:13:14 +02:00
}
}
2020-10-19 15:12:08 +02:00
return masterId;
}
2020-10-30 10:05:23 +01:00
@Transactional
public void fixConflictDifferents(final List<String> differentsIds, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
for (int i = 0; i < differentsIds.size(); i++) {
for (int j = i + 1; j < differentsIds.size(); j++) {
openaireConflictRepository
.updateStatusAndResetGroup(differentsIds.get(i), differentsIds.get(j), SimilarityType.is_different.toString(), user, now);
}
}
}
2020-11-12 15:47:41 +01:00
@Transactional
public Optional<User> findUser(final String email) {
final Optional<User> user = userRepository.findById(email);
if (user.isPresent()) {
userRepository.updateLastAccess(email, OffsetDateTime.now());
}
return user;
}
2020-10-19 15:12:08 +02:00
private String findFirstString(final List<OrganizationView> views, final Function<OrganizationView, String> mapper) {
return views.stream().map(mapper).filter(StringUtils::isNotBlank).findFirst().orElse(null);
}
private Double findFirstNumber(final List<OrganizationView> views, final Function<OrganizationView, Double> mapper) {
return views.stream().map(mapper).filter(Objects::nonNull).filter(n -> n != 0).findFirst().orElse(0.0);
}
private <T> Set<T> findAll(final List<OrganizationView> views, final Function<OrganizationView, Set<T>> mapper) {
return views.stream().map(mapper).flatMap(s -> s.stream()).collect(Collectors.toCollection(LinkedHashSet::new));
}
private List<Relationship> hideConflictOrgs(final String masterId, final String otherId) {
2020-10-07 17:04:29 +02:00
organizationRepository.updateStatus(otherId, OrganizationStatus.hidden.toString());
openaireConflictRepository.findById(new OpenaireConflictPK(masterId, otherId)).ifPresent(openaireConflictRepository::delete);
openaireConflictRepository.findById(new OpenaireConflictPK(otherId, masterId)).ifPresent(openaireConflictRepository::delete);
return makeRelation(masterId, otherId, RelationType.Merges);
}
2020-07-03 12:09:22 +02:00
}