dnet-applications/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java

458 lines
19 KiB
Java

package eu.dnetlib.organizations.utils;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.transaction.Transactional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.jdbc.core.BeanPropertyRowMapper;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestBody;
import eu.dnetlib.organizations.controller.UserRole;
import eu.dnetlib.organizations.model.Acronym;
import eu.dnetlib.organizations.model.OpenaireConflict;
import eu.dnetlib.organizations.model.OpenaireConflictPK;
import eu.dnetlib.organizations.model.OpenaireDuplicate;
import eu.dnetlib.organizations.model.Organization;
import eu.dnetlib.organizations.model.OtherIdentifier;
import eu.dnetlib.organizations.model.OtherName;
import eu.dnetlib.organizations.model.Relationship;
import eu.dnetlib.organizations.model.Url;
import eu.dnetlib.organizations.model.User;
import eu.dnetlib.organizations.model.UserCountry;
import eu.dnetlib.organizations.model.utils.BrowseEntry;
import eu.dnetlib.organizations.model.utils.OrganizationConflict;
import eu.dnetlib.organizations.model.utils.TempBrowseEntry;
import eu.dnetlib.organizations.model.utils.VocabularyTerm;
import eu.dnetlib.organizations.model.view.OrganizationView;
import eu.dnetlib.organizations.model.view.UserView;
import eu.dnetlib.organizations.repository.AcronymRepository;
import eu.dnetlib.organizations.repository.OpenaireConflictRepository;
import eu.dnetlib.organizations.repository.OpenaireDuplicateRepository;
import eu.dnetlib.organizations.repository.OrganizationRepository;
import eu.dnetlib.organizations.repository.OtherIdentifierRepository;
import eu.dnetlib.organizations.repository.OtherNameRepository;
import eu.dnetlib.organizations.repository.RelationshipRepository;
import eu.dnetlib.organizations.repository.UrlRepository;
import eu.dnetlib.organizations.repository.UserCountryRepository;
import eu.dnetlib.organizations.repository.UserRepository;
import eu.dnetlib.organizations.repository.readonly.OrganizationViewRepository;
@Component
public class DatabaseUtils {
@Autowired
private AcronymRepository acronymRepository;
@Autowired
private OrganizationRepository organizationRepository;
@Autowired
private OtherIdentifierRepository otherIdentifierRepository;
@Autowired
private OtherNameRepository otherNameRepository;
@Autowired
private UrlRepository urlRepository;
@Autowired
private RelationshipRepository relationshipRepository;
@Autowired
private UserRepository userRepository;
@Autowired
private UserCountryRepository userCountryRepository;
@Autowired
private OpenaireConflictRepository openaireConflictRepository;
@Autowired
private OpenaireDuplicateRepository openaireDuplicateRepository;
@Autowired
private OrganizationViewRepository organizationViewRepository;
@Autowired
private JdbcTemplate jdbcTemplate;
private static final Log log = LogFactory.getLog(DatabaseUtils.class);
public enum VocabularyTable {
languages,
countries,
org_types,
id_types,
rel_types,
simrel_types
}
@Transactional
public String insertOrUpdateOrganization(final OrganizationView orgView, final String user, final boolean isSimpleUser) {
final String oldStatus = orgView.getId() != null ? organizationRepository.findById(orgView.getId())
.map(Organization::getStatus)
.orElse(null) : null;
final boolean alreadyApproved = StringUtils.equals(oldStatus, OrganizationStatus.approved.toString());
final String newStatus;
if (!isSimpleUser) { // IS ADMIN
newStatus = OrganizationStatus.approved.toString();
} else if (isSimpleUser && oldStatus == null) {
newStatus = OrganizationStatus.suggested.toString();
} else if (isSimpleUser && alreadyApproved) {
newStatus = OrganizationStatus.approved.toString();
} else {
throw new RuntimeException("User not authorized");
}
final String oldId = StringUtils.isNotBlank(orgView.getId()) ? new String(orgView.getId()) : null;
if (oldId == null || !oldId.startsWith(OpenOrgsConstants.OPENORGS_PREFIX)) {
orgView.setId(null);
}
final Organization org = new Organization(orgView.getId(),
orgView.getName(),
orgView.getType(),
orgView.getLat(), orgView.getLng(),
orgView.getCity(), orgView.getCountry(),
newStatus);
final String newId = organizationRepository.save(org).getId();
final OffsetDateTime now = OffsetDateTime.now();
organizationRepository.updateModificationDate(newId, user, now);
if (StringUtils.equals(newId, oldId)) {
makeRelations(newId, orgView, true);
} else {
organizationRepository.updateCreationDate(newId, user, now);
makeRelations(newId, orgView, false);
if (oldId != null) {
final List<OpenaireDuplicate> dups = new ArrayList<>();
dups.add(new OpenaireDuplicate(newId, oldId, SimilarityType.is_similar.toString()));
dups.addAll(openaireDuplicateRepository.findByLocalId(oldId)
.stream()
.map(d -> new OpenaireDuplicate(newId, d.getOaOriginalId(), SimilarityType.suggested.toString()))
.collect(Collectors.toList()));
openaireDuplicateRepository.saveAll(dups);
dups.forEach(d -> openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now));
organizationRepository.updateStatus(oldId, OrganizationStatus.duplicate.toString());
organizationRepository.updateModificationDate(oldId, user, now);
}
}
return newId;
}
@Transactional
public void saveDuplicates(final List<OpenaireDuplicate> simrels, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
final List<OpenaireDuplicate> list = openaireDuplicateRepository.saveAll(simrels);
list.forEach(d -> {
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
if (d.getRelType().equals(SimilarityType.is_different.toString())) {
updateStatus(d.getOaOriginalId(), OrganizationStatus.suggested, user, now);
} else {
updateStatus(d.getOaOriginalId(), OrganizationStatus.duplicate, user, now);
}
});
}
private void makeRelations(final String orgId, final OrganizationView orgView, final boolean update) {
if (update) {
acronymRepository.deleteByOrgId(orgId);
otherNameRepository.deleteByOrgId(orgId);
otherIdentifierRepository.deleteByOrgId(orgId);
urlRepository.deleteByOrgId(orgId);
relationshipRepository.deleteById1(orgId);
relationshipRepository.deleteById2(orgId);
}
orgView.getAcronyms().forEach(s -> acronymRepository.save(new Acronym(orgId, s)));
orgView.getOtherNames().forEach(n -> otherNameRepository.save(new OtherName(orgId, n.getName(), n.getLang())));
orgView.getOtherIdentifiers().forEach(id -> otherIdentifierRepository.save(new OtherIdentifier(orgId, id.getId(), id.getType())));
orgView.getUrls().forEach(u -> urlRepository.save(new Url(orgId, u)));
orgView.getRelations().forEach(r -> makeRelation(orgId, r.getRelatedOrgId(), RelationType.valueOf(r.getType())));
}
@Cacheable("vocs")
public List<VocabularyTerm> listValuesOfVocabularyTable(final VocabularyTable table) {
final String sql = "select val as value, name as name from " + table;
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(VocabularyTerm.class));
}
@Cacheable("countries_for_user")
public List<VocabularyTerm> listCountriesForUser(final String name) {
final String sql =
"select uc.country as value, c.name as name from user_countries uc left outer join countries c on (c.val = uc.country) where uc.email = ?";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(VocabularyTerm.class), name);
}
@Transactional
public void saveUser(@RequestBody final UserView userView) {
final User user = userRepository.findById(userView.getEmail()).orElseThrow(() -> new RuntimeException("User not found"));
user.setRole(userView.getRole());
user.setValid(userView.isValid());
userRepository.save(user);
userCountryRepository.deleteByEmail(userView.getEmail());
if (userView.getCountries() != null) {
userCountryRepository
.saveAll(Arrays.stream(userView.getCountries()).map(c -> new UserCountry(userView.getEmail(), c)).collect(Collectors.toList()));
}
}
@Transactional
public void deleteUser(final String email) {
userCountryRepository.deleteByEmail(email);
userRepository.deleteById(email);
}
@Transactional
public void newUser(final String email, final List<String> countries) {
final User user = new User();
user.setEmail(email);
user.setRole(UserRole.PENDING.name());
user.setValid(false);
userRepository.save(user);
if (countries != null) {
userCountryRepository.saveAll(countries.stream().map(c -> new UserCountry(email, c)).collect(Collectors.toList()));
}
}
/*
* @Transactional public void verifyConflictGroups(final boolean forceUpdate) {
*
* if (forceUpdate || openaireConflictRepository.countByGroupNull() > 0) {
*
* log.info("Recreating conflicts group...");
*
* openaireConflictRepository.resetGroupIds();
*
* final Map<String, Set<String>> groups = new HashMap<>(); for (final OpenaireConflict w : openaireConflictRepository.findAll()) {
* final List<String> list = findExistingGroupsForRel(w, groups); if (list.isEmpty()) { final String idGroup = generateGroupId();
* groups.put(idGroup, new HashSet<>()); addToGroup(groups, idGroup, w); } else if (list.size() == 1) { addToGroup(groups, list.get(0),
* w); } else { final String idGroup = generateGroupId(); groups.put(idGroup, new TreeSet<>()); list.forEach(id ->
* groups.get(idGroup).addAll(groups.get(id))); list.forEach(id -> groups.remove(id)); addToGroup(groups, idGroup, w); } }
*
* for (final Entry<String, Set<String>> e : groups.entrySet()) { final String gid = e.getKey(); for (final String orgId : e.getValue())
* { for (final OpenaireConflict oc : openaireConflictRepository.findById1AndGroupIsNull(orgId)) { oc.setGroup(gid);
* openaireConflictRepository.save(oc); } for (final OpenaireConflict oc : openaireConflictRepository.findById2AndGroupIsNull(orgId)) {
* oc.setGroup(gid); openaireConflictRepository.save(oc); } } }
*
* log.info("...conflict groups recreated"); } }
*
* private String generateGroupId() { return "group::" + UUID.randomUUID(); }
*/
private List<String> findExistingGroupsForRel(final OpenaireConflict w, final Map<String, Set<String>> groups) {
return groups.entrySet()
.stream()
.filter(e -> {
return e.getValue().contains(w.getId1()) || e.getValue().contains(w.getId2());
})
.map(e -> e.getKey())
.distinct()
.collect(Collectors.toList());
}
private void addToGroup(final Map<String, Set<String>> groups, final String gid, final OpenaireConflict w) {
groups.get(gid).add(w.getId1());
groups.get(gid).add(w.getId2());
}
@Transactional
public List<Relationship> makeRelation(final String id1, final String id2, final RelationType type) {
final Relationship r1 = new Relationship(id1, id2, type.toString());
final Relationship r2 = new Relationship(id2, id1, type.getInverse().toString());
relationshipRepository.save(r1);
relationshipRepository.save(r2);
return Arrays.asList(r1, r2);
}
// BROWSE BY COUNTRY
public List<BrowseEntry> browseCountries() {
final String sql =
"select o.country as code, c.name as name, o.status as group, count(o.status) as count from organizations o left outer join countries c on (o.country = c.val) group by o.country, c.name, o.status";
return listBrowseEntries(sql);
}
// BROWSE BY COUNTRY FOR USER
public List<BrowseEntry> browseCountriesForUser(final String email) {
final String sql =
"select o.country as code, c.name as name, o.status as group, count(o.status) as count from user_countries uc left outer join organizations o on (uc.country = o.country) left outer join countries c on (o.country = c.val) where uc.email=? group by o.country, c.name, o.status";
return listBrowseEntries(sql, email);
}
// BROWSE BY ORG TYPE
public List<BrowseEntry> browseTypes() {
final String sql =
"select type as code, type as name, status as group, count(status) as count from organizations group by type, status";
return listBrowseEntries(sql);
}
// BROWSE BY ORG TYPE FOR USER
public List<BrowseEntry> browseTypesForUser(final String email) {
final String sql = "select o.type as code, o.type as name,"
+ "o.status as group, count(o.status) as count "
+ "from organizations o "
+ "left outer join user_countries uc on (uc.country = o.country) "
+ "where uc.email=? "
+ "group by o.type, o.status";
return listBrowseEntries(sql, email);
}
private List<BrowseEntry> listBrowseEntries(final String sql, final Object... params) {
final Map<String, BrowseEntry> map = new HashMap<>();
for (final TempBrowseEntry t : jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(TempBrowseEntry.class), params)) {
if (StringUtils.isNotBlank(t.getCode())) {
if (!map.containsKey(t.getCode())) {
final BrowseEntry e = new BrowseEntry();
e.setCode(t.getCode());
e.setName(t.getName());
map.put(t.getCode(), e);
}
map.get(t.getCode()).getValues().put(t.getGroup(), t.getCount());
}
}
return map.values().stream().sorted((o1, o2) -> StringUtils.compare(o1.getName(), o2.getName())).collect(Collectors.toList());
}
public List<OrganizationConflict> listConflictsForId(final String id) {
final String sql =
"select o.id, o.name, o.type, o.city, o.country from oa_conflicts c left outer join organizations o on (c.id2 = o.id) where o.id is not null and c.id1 = ?";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(OrganizationConflict.class), id);
}
@Transactional
public void importDedupEvents() {
try {
log.info("Importing conflicts and duplicates...");
jdbcTemplate.update(IOUtils.toString(getClass().getResourceAsStream("/sql/importDedupEvents.sql")));
log.info("...done");
// verifyConflictGroups(true);
} catch (final Exception e) {
log.error("Error importing conflicts and duplicates", e);
}
}
@Transactional
public OrganizationView markAsDeleted(final String id, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
updateStatus(id, OrganizationStatus.deleted, user, now);
return organizationViewRepository.findById(id).get();
}
@Transactional
public OrganizationView markAsDiscarded(final String id, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
updateStatus(id, OrganizationStatus.discarded, user, now);
openaireDuplicateRepository.findByLocalId(id).forEach(d -> updateStatus(d.getOaOriginalId(), OrganizationStatus.suggested, user, now));
return organizationViewRepository.findById(id).get();
}
private void updateStatus(final String id, final OrganizationStatus status, final String user, final OffsetDateTime now) {
organizationRepository.updateStatus(id, status.toString());
organizationRepository.updateModificationDate(id, user, now);
}
@Transactional
public String fixConflict(final List<String> ids, final String user) {
final List<OrganizationView> views =
ids.stream().map(organizationViewRepository::findById).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
// I create a new org
final OrganizationView newOrg = new OrganizationView();
newOrg.setId(null);
newOrg.setStatus(null);
newOrg.setName(findFirstString(views, OrganizationView::getName));
newOrg.setType(findFirstString(views, OrganizationView::getType));
newOrg.setLat(findFirstNumber(views, OrganizationView::getLat));
newOrg.setLng(findFirstNumber(views, OrganizationView::getLng));
newOrg.setCity(findFirstString(views, OrganizationView::getCity));
newOrg.setCountry(findFirstString(views, OrganizationView::getCountry));
newOrg.setOtherIdentifiers(findAll(views, OrganizationView::getOtherIdentifiers));
newOrg.setOtherNames(findAll(views, OrganizationView::getOtherNames));
newOrg.setAcronyms(findAll(views, OrganizationView::getAcronyms));
newOrg.setUrls(findAll(views, OrganizationView::getUrls));
newOrg.setRelations(findAll(views, OrganizationView::getRelations));
newOrg.getOtherNames()
.addAll(views.stream()
.map(OrganizationView::getName)
.filter(StringUtils::isNotBlank)
.filter(s -> StringUtils.equalsIgnoreCase(s, newOrg.getName()))
.map(s -> new eu.dnetlib.organizations.model.view.OtherName(s, "UNKNOWN"))
.collect(Collectors.toList()));
final String masterId = insertOrUpdateOrganization(newOrg, user, false);
// I hide the merged organizations
ids.forEach(id -> hideConflictOrgs(masterId, id));
// I reassign the duplicated to the new org
final List<OpenaireDuplicate> newDuplicates = ids.stream()
.map(openaireDuplicateRepository::findByLocalId)
.flatMap(l -> l.stream())
.filter(d -> d.getRelType().equals(SimilarityType.is_similar.toString()))
.map(d -> new OpenaireDuplicate(masterId, d.getOaOriginalId(), d.getRelType()))
.collect(Collectors.toList());
newDuplicates.forEach(d -> d.setLocalId(masterId));
final OffsetDateTime now = OffsetDateTime.now();
for (int i = 0; i < ids.size(); i++) {
for (int j = i + 1; j < ids.size(); j++) {
openaireConflictRepository.updateStatus(ids.get(i), ids.get(j), SimilarityType.is_similar.toString(), user, now);
}
}
return masterId;
}
private String findFirstString(final List<OrganizationView> views, final Function<OrganizationView, String> mapper) {
return views.stream().map(mapper).filter(StringUtils::isNotBlank).findFirst().orElse(null);
}
private Double findFirstNumber(final List<OrganizationView> views, final Function<OrganizationView, Double> mapper) {
return views.stream().map(mapper).filter(Objects::nonNull).filter(n -> n != 0).findFirst().orElse(0.0);
}
private <T> Set<T> findAll(final List<OrganizationView> views, final Function<OrganizationView, Set<T>> mapper) {
return views.stream().map(mapper).flatMap(s -> s.stream()).collect(Collectors.toCollection(LinkedHashSet::new));
}
private List<Relationship> hideConflictOrgs(final String masterId, final String otherId) {
organizationRepository.updateStatus(otherId, OrganizationStatus.hidden.toString());
openaireConflictRepository.findById(new OpenaireConflictPK(masterId, otherId)).ifPresent(openaireConflictRepository::delete);
openaireConflictRepository.findById(new OpenaireConflictPK(otherId, masterId)).ifPresent(openaireConflictRepository::delete);
return makeRelation(masterId, otherId, RelationType.Merges);
}
}