dnet-applications/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java

320 lines
12 KiB
Java
Raw Normal View History

2020-07-03 12:09:22 +02:00
package eu.dnetlib.organizations.utils;
import java.time.OffsetDateTime;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.stream.Collectors;
import javax.transaction.Transactional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
2020-07-03 12:09:22 +02:00
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.jdbc.core.BeanPropertyRowMapper;
2020-07-03 12:09:22 +02:00
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestBody;
import eu.dnetlib.organizations.controller.UserRole;
import eu.dnetlib.organizations.model.Acronym;
import eu.dnetlib.organizations.model.OpenaireConflict;
import eu.dnetlib.organizations.model.OpenaireConflictPK;
2020-09-29 15:31:56 +02:00
import eu.dnetlib.organizations.model.OpenaireDuplicate;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.Organization;
import eu.dnetlib.organizations.model.OtherIdentifier;
import eu.dnetlib.organizations.model.OtherName;
import eu.dnetlib.organizations.model.Relationship;
import eu.dnetlib.organizations.model.Url;
import eu.dnetlib.organizations.model.User;
import eu.dnetlib.organizations.model.UserCountry;
import eu.dnetlib.organizations.model.utils.BrowseEntry;
import eu.dnetlib.organizations.model.utils.OrganizationConflict;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.model.view.OrganizationView;
import eu.dnetlib.organizations.model.view.UserView;
import eu.dnetlib.organizations.repository.AcronymRepository;
import eu.dnetlib.organizations.repository.OpenaireConflictRepository;
2020-09-29 15:31:56 +02:00
import eu.dnetlib.organizations.repository.OpenaireDuplicateRepository;
2020-07-03 12:09:22 +02:00
import eu.dnetlib.organizations.repository.OrganizationRepository;
import eu.dnetlib.organizations.repository.OtherIdentifierRepository;
import eu.dnetlib.organizations.repository.OtherNameRepository;
import eu.dnetlib.organizations.repository.RelationshipRepository;
import eu.dnetlib.organizations.repository.UrlRepository;
import eu.dnetlib.organizations.repository.UserCountryRepository;
import eu.dnetlib.organizations.repository.UserRepository;
@Component
public class DatabaseUtils {
@Autowired
private AcronymRepository acronymRepository;
@Autowired
private OrganizationRepository organizationRepository;
@Autowired
private OtherIdentifierRepository otherIdentifierRepository;
@Autowired
private OtherNameRepository otherNameRepository;
@Autowired
private UrlRepository urlRepository;
@Autowired
private RelationshipRepository relationshipRepository;
@Autowired
private UserRepository userRepository;
@Autowired
private UserCountryRepository userCountryRepository;
@Autowired
private OpenaireConflictRepository openaireConflictRepository;
@Autowired
2020-09-29 15:31:56 +02:00
private OpenaireDuplicateRepository openaireDuplicateRepository;
@Autowired
2020-07-03 12:09:22 +02:00
private JdbcTemplate jdbcTemplate;
private static final Log log = LogFactory.getLog(DatabaseUtils.class);
2020-07-03 12:09:22 +02:00
public enum VocabularyTable {
languages,
countries,
org_types,
id_types,
rel_types,
simrel_types
2020-07-03 12:09:22 +02:00
}
@Transactional
public String insertOrUpdateOrganization(final OrganizationView orgView, final String user, final boolean update) {
if (update) {
cleanOldRelations(orgView.getId());
}
final Organization org = new Organization(update ? orgView.getId() : null,
orgView.getName(),
orgView.getType(),
orgView.getLat(), orgView.getLng(),
orgView.getCity(), orgView.getCountry());
2020-07-03 12:09:22 +02:00
final String orgId = organizationRepository.save(org).getId();
makeNewRelations(orgView, orgId);
updateHistoryFields(orgId, user, update);
return orgId;
}
private void updateHistoryFields(final String id, final String user, final boolean update) {
final OffsetDateTime now = OffsetDateTime.now();
if (update) {
organizationRepository.updateModificationDate(id, user, now);
} else {
organizationRepository.updateCreationDate(id, user, now);
organizationRepository.updateModificationDate(id, user, now);
}
}
2020-09-29 15:31:56 +02:00
@Transactional
public List<OpenaireDuplicate> saveDuplicates(final List<OpenaireDuplicate> simrels, final String email) {
final OffsetDateTime now = OffsetDateTime.now();
final List<OpenaireDuplicate> list = openaireDuplicateRepository.saveAll(simrels);
list.forEach(d -> openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), email, now));
return list;
}
2020-07-03 12:09:22 +02:00
private void makeNewRelations(final OrganizationView orgView, final String orgId) {
orgView.getAcronyms().forEach(s -> acronymRepository.save(new Acronym(orgId, s)));
orgView.getOtherNames().forEach(n -> otherNameRepository.save(new OtherName(orgId, n.getName(), n.getLang())));
orgView.getOtherIdentifiers().forEach(id -> otherIdentifierRepository.save(new OtherIdentifier(orgId, id.getId(), id.getType())));
orgView.getUrls().forEach(u -> urlRepository.save(new Url(orgId, u)));
orgView.getRelations().forEach(r -> makeRelation(orgId, r.getRelatedOrgId(), RelationType.valueOf(r.getType())));
}
private void cleanOldRelations(final String id) {
acronymRepository.deleteByOrgId(id);
otherNameRepository.deleteByOrgId(id);
otherIdentifierRepository.deleteByOrgId(id);
urlRepository.deleteByOrgId(id);
relationshipRepository.deleteById1(id);
relationshipRepository.deleteById2(id);
}
@Cacheable("vocs")
public List<String> listValuesOfVocabularyTable(final VocabularyTable table) {
return jdbcTemplate.queryForList("select val from " + table, String.class);
}
@Cacheable("countries_for_user")
public List<String> listCountriesForUser(final String name) {
return jdbcTemplate.queryForList("select country from user_countries where email = ?", String.class, name);
}
@Transactional
public void saveUser(@RequestBody final UserView userView) {
final User user = userRepository.findById(userView.getEmail()).orElseThrow(() -> new RuntimeException("User not found"));
user.setRole(userView.getRole());
user.setValid(userView.isValid());
userRepository.save(user);
userCountryRepository.deleteByEmail(userView.getEmail());
if (userView.getCountries() != null) {
userCountryRepository
.saveAll(Arrays.stream(userView.getCountries()).map(c -> new UserCountry(userView.getEmail(), c)).collect(Collectors.toList()));
2020-07-03 12:09:22 +02:00
}
}
@Transactional
public void deleteUser(final String email) {
userCountryRepository.deleteByEmail(email);
userRepository.deleteById(email);
}
@Transactional
public void newUser(final String email, final List<String> countries) {
final User user = new User();
user.setEmail(email);
user.setRole(UserRole.PENDING.name());
user.setValid(false);
userRepository.save(user);
if (countries != null) {
userCountryRepository.saveAll(countries.stream().map(c -> new UserCountry(email, c)).collect(Collectors.toList()));
}
}
@Transactional
public void verifyConflictGroups(final boolean forceUpdate) {
if (forceUpdate || openaireConflictRepository.countByGroupNull() > 0) {
2020-09-29 11:48:45 +02:00
log.info("Recreating conflicts group...");
2020-07-03 12:09:22 +02:00
openaireConflictRepository.resetGroupIds();
final Map<String, Set<String>> groups = new HashMap<>();
for (final OpenaireConflict w : openaireConflictRepository.findAll()) {
final List<String> list = findExistingGroupsForRel(w, groups);
if (list.isEmpty()) {
final String idGroup = generateGroupId();
groups.put(idGroup, new HashSet<>());
addToGroup(groups, idGroup, w);
} else if (list.size() == 1) {
addToGroup(groups, list.get(0), w);
} else {
final String idGroup = generateGroupId();
groups.put(idGroup, new TreeSet<>());
list.forEach(id -> groups.get(idGroup).addAll(groups.get(id)));
list.forEach(id -> groups.remove(id));
addToGroup(groups, idGroup, w);
}
}
for (final Entry<String, Set<String>> e : groups.entrySet()) {
final String gid = e.getKey();
for (final String orgId : e.getValue()) {
for (final OpenaireConflict oc : openaireConflictRepository.findById1AndGroupIsNull(orgId)) {
oc.setGroup(gid);
openaireConflictRepository.save(oc);
}
for (final OpenaireConflict oc : openaireConflictRepository.findById2AndGroupIsNull(orgId)) {
oc.setGroup(gid);
openaireConflictRepository.save(oc);
}
}
}
2020-09-29 11:48:45 +02:00
log.info("...conflicts group recreated");
2020-07-03 12:09:22 +02:00
}
}
private String generateGroupId() {
return "group::" + UUID.randomUUID();
}
private List<String> findExistingGroupsForRel(final OpenaireConflict w, final Map<String, Set<String>> groups) {
return groups.entrySet()
.stream()
.filter(e -> {
return e.getValue().contains(w.getId1()) || e.getValue().contains(w.getId2());
})
.map(e -> e.getKey())
.distinct()
.collect(Collectors.toList());
2020-07-03 12:09:22 +02:00
}
private void addToGroup(final Map<String, Set<String>> groups, final String gid, final OpenaireConflict w) {
groups.get(gid).add(w.getId1());
groups.get(gid).add(w.getId2());
}
@Transactional
public List<Relationship> makeRelation(final String id1, final String id2, final RelationType type) {
final Relationship r1 = new Relationship(id1, id2, type.toString());
final Relationship r2 = new Relationship(id2, id1, type.getInverse().toString());
relationshipRepository.save(r1);
relationshipRepository.save(r2);
if (type == RelationType.Merged_In || type == RelationType.Merges) {
openaireConflictRepository.findById(new OpenaireConflictPK(id1, id2)).ifPresent(openaireConflictRepository::delete);
openaireConflictRepository.findById(new OpenaireConflictPK(id2, id1)).ifPresent(openaireConflictRepository::delete);
}
return Arrays.asList(r1, r2);
}
// BROWSE BY COUNTRY
public List<BrowseEntry> browseCountries() {
final String sql = "select country as value, count(*) as count from organizations group by country order by count desc";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(BrowseEntry.class));
}
// BROWSE BY COUNTRY FOR USER
public List<BrowseEntry> browseCountriesForUser(final String email) {
final String sql =
"select o.country as value, count(o.country) as count from user_countries uc left outer join organizations o on (uc.country = o.country) where uc.email=? group by o.country order by count desc";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(BrowseEntry.class), email);
}
// BROWSE BY ORG TYPE
public List<BrowseEntry> browseTypes() {
final String sql = "select type as value, count(*) as count from organizations group by type order by count desc";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(BrowseEntry.class));
}
// BROWSE BY ORG TYPE FOR USER
public List<BrowseEntry> browseTypesForUser(final String email) {
final String sql = "select o.type as value, count(o.type) as count "
+ "from organizations o "
+ "left outer join user_countries uc on (uc.country = o.country) "
+ "where uc.email=? "
+ "group by o.type "
+ "order by count desc;";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(BrowseEntry.class), email);
}
public List<OrganizationConflict> listConflictsForId(final String id) {
final String sql =
"select o.id, o.name, o.type, o.city, o.country from oa_conflicts c left outer join organizations o on (c.id2 = o.id) where o.id is not null and c.id1 = ?";
return jdbcTemplate.query(sql, new BeanPropertyRowMapper<>(OrganizationConflict.class), id);
}
2020-09-29 11:48:45 +02:00
@Transactional
public void importSimRels() {
try {
2020-09-29 11:34:31 +02:00
log.info("Importing conflicts and duplicates...");
jdbcTemplate.update(IOUtils.toString(getClass().getResourceAsStream("/sql/importNewRels.sql")));
2020-09-29 11:34:31 +02:00
log.info("...done");
2020-09-29 11:48:45 +02:00
verifyConflictGroups(true);
} catch (final Exception e) {
2020-09-29 11:34:31 +02:00
log.error("Error importing conflicts and duplicates", e);
}
}
2020-07-03 12:09:22 +02:00
}