sql for import dedup_events

This commit is contained in:
Michele Artini 2020-10-14 16:28:00 +02:00
parent aa24a5475d
commit f3cec6ab87
13 changed files with 95 additions and 101 deletions

View File

@ -32,7 +32,7 @@ public class MainApplication {
return new Docket(DocumentationType.SWAGGER_2)
.select()
.apis(RequestHandlerSelectors.any())
.paths(p -> p.startsWith("/api/"))
.paths(p -> p.startsWith("/api/") || p.startsWith("/oa_api"))
.build()
.apiInfo(new ApiInfoBuilder()
.title("D-Net Organizations Service APIs")

View File

@ -23,12 +23,12 @@ public class MyAccessDeniedHandler implements AccessDeniedHandler {
@Override
public void handle(final HttpServletRequest req, final HttpServletResponse res, final AccessDeniedException e)
throws IOException, ServletException {
throws IOException, ServletException {
final Authentication auth = SecurityContextHolder.getContext().getAuthentication();
if (auth != null) {
logger.warn(String.format("User '%s' attempted to access the protected URL: %s", auth.getName(), req.getRequestURI()));
logger.warn(String.format("User '%s' (%s) attempted to access the protected URL: %s", auth.getName(), req.getRemoteAddr(), req.getRequestURI()));
}
if (UserInfo.isNotAuthorized(auth)) {

View File

@ -33,7 +33,8 @@ public class OpenaireInternalApiController {
log.warn("Call received by blaklisted ip (https proxy): " + req.getRemoteAddr());
throw new RuntimeException("Call received by blaklisted ip (https proxy): " + req.getRemoteAddr());
}
new Thread(databaseUtils::importDedupEvents).run();
new Thread(databaseUtils::importDedupEvents).start();
return Arrays.asList("Importing simrels (request from " + req.getRemoteAddr() + ") ...");
}
}

View File

@ -139,7 +139,7 @@ public class OrganizationController {
@GetMapping("/conflicts/byCountry/{country}")
public Collection<Set<OrganizationConflict>> findConflictsByCountry(@PathVariable final String country, final Authentication authentication) {
databaseUtils.verifyConflictGroups(false);
// databaseUtils.verifyConflictGroups(false);
if (UserInfo.isSuperAdmin(authentication)) {
return groupConflicts(conflictGroupViewRepository.findByCountry1OrCountry2(country, country).stream());
@ -234,7 +234,7 @@ public class OrganizationController {
}
@GetMapping("/byCountry/{status}/{code}")
public Iterable<OrganizationSimpleView> findPendingOrgsByCountry(@PathVariable final String status,
public Iterable<OrganizationSimpleView> findOrgsByStatusAndCountry(@PathVariable final String status,
@PathVariable final String code,
final Authentication authentication) {
if (UserInfo.isSuperAdmin(authentication) || userCountryRepository.verifyAuthorizationForCountry(code, authentication.getName())) {

View File

@ -3,17 +3,14 @@ package eu.dnetlib.organizations.utils;
import java.time.OffsetDateTime;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.stream.Collectors;
import javax.transaction.Transactional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -204,54 +201,30 @@ public class DatabaseUtils {
}
}
@Transactional
public void verifyConflictGroups(final boolean forceUpdate) {
if (forceUpdate || openaireConflictRepository.countByGroupNull() > 0) {
log.info("Recreating conflicts group...");
openaireConflictRepository.resetGroupIds();
final Map<String, Set<String>> groups = new HashMap<>();
for (final OpenaireConflict w : openaireConflictRepository.findAll()) {
final List<String> list = findExistingGroupsForRel(w, groups);
if (list.isEmpty()) {
final String idGroup = generateGroupId();
groups.put(idGroup, new HashSet<>());
addToGroup(groups, idGroup, w);
} else if (list.size() == 1) {
addToGroup(groups, list.get(0), w);
} else {
final String idGroup = generateGroupId();
groups.put(idGroup, new TreeSet<>());
list.forEach(id -> groups.get(idGroup).addAll(groups.get(id)));
list.forEach(id -> groups.remove(id));
addToGroup(groups, idGroup, w);
}
}
for (final Entry<String, Set<String>> e : groups.entrySet()) {
final String gid = e.getKey();
for (final String orgId : e.getValue()) {
for (final OpenaireConflict oc : openaireConflictRepository.findById1AndGroupIsNull(orgId)) {
oc.setGroup(gid);
openaireConflictRepository.save(oc);
}
for (final OpenaireConflict oc : openaireConflictRepository.findById2AndGroupIsNull(orgId)) {
oc.setGroup(gid);
openaireConflictRepository.save(oc);
}
}
}
log.info("...conflicts group recreated");
}
}
private String generateGroupId() {
return "group::" + UUID.randomUUID();
}
/*
* @Transactional public void verifyConflictGroups(final boolean forceUpdate) {
*
* if (forceUpdate || openaireConflictRepository.countByGroupNull() > 0) {
*
* log.info("Recreating conflicts group...");
*
* openaireConflictRepository.resetGroupIds();
*
* final Map<String, Set<String>> groups = new HashMap<>(); for (final OpenaireConflict w : openaireConflictRepository.findAll()) {
* final List<String> list = findExistingGroupsForRel(w, groups); if (list.isEmpty()) { final String idGroup = generateGroupId();
* groups.put(idGroup, new HashSet<>()); addToGroup(groups, idGroup, w); } else if (list.size() == 1) { addToGroup(groups, list.get(0),
* w); } else { final String idGroup = generateGroupId(); groups.put(idGroup, new TreeSet<>()); list.forEach(id ->
* groups.get(idGroup).addAll(groups.get(id))); list.forEach(id -> groups.remove(id)); addToGroup(groups, idGroup, w); } }
*
* for (final Entry<String, Set<String>> e : groups.entrySet()) { final String gid = e.getKey(); for (final String orgId : e.getValue())
* { for (final OpenaireConflict oc : openaireConflictRepository.findById1AndGroupIsNull(orgId)) { oc.setGroup(gid);
* openaireConflictRepository.save(oc); } for (final OpenaireConflict oc : openaireConflictRepository.findById2AndGroupIsNull(orgId)) {
* oc.setGroup(gid); openaireConflictRepository.save(oc); } } }
*
* log.info("...conflict groups recreated"); } }
*
* private String generateGroupId() { return "group::" + UUID.randomUUID(); }
*/
private List<String> findExistingGroupsForRel(final OpenaireConflict w, final Map<String, Set<String>> groups) {
return groups.entrySet()
@ -337,9 +310,10 @@ public class DatabaseUtils {
@Transactional
public void importDedupEvents() {
try {
// log.info("Importing conflicts and duplicates...");
// jdbcTemplate.update(IOUtils.toString(getClass().getResourceAsStream("/sql/importNewRels.sql")));
// log.info("...done");
log.info("Importing conflicts and duplicates...");
jdbcTemplate.update(IOUtils.toString(getClass().getResourceAsStream("/sql/importDedupEvents.sql")));
log.info("...done");
// verifyConflictGroups(true);
} catch (final Exception e) {
log.error("Error importing conflicts and duplicates", e);

View File

@ -1,7 +1,7 @@
package eu.dnetlib.organizations.utils;
public enum OrganizationStatus {
pending,
suggested, // from user or dedup depends by created_by field
approved,
discarded,
hidden,

View File

@ -13,9 +13,9 @@ spring.jpa.properties.hibernate.hbm2dll.extra_physical_table_types = MATERIALIZE
spring.jpa.properties.hibernate.jdbc.lob.non_contextual_creation=true
spring.jpa.open-in-view=true
spring.jpa.properties.hibernate.show_sql=true
spring.jpa.properties.hibernate.use_sql_comments=true
spring.jpa.properties.hibernate.format_sql=true
spring.jpa.properties.hibernate.show_sql=false
spring.jpa.properties.hibernate.use_sql_comments=false
spring.jpa.properties.hibernate.format_sql=false
# the ICM private network
openaire.api.valid.subnet = 10.19.65.0/24

View File

@ -0,0 +1,32 @@
BEGIN;
DELETE FROM organizations WHERE status = 'suggested' and created_by = 'dedupWf';
DELETE FROM oa_duplicates WHERE reltype = 'suggested' and created_by = 'dedupWf';
DELETE FROM oa_conflicts WHERE reltype = 'suggested' and created_by = 'dedupWf';
-- FIX IMPORT DATA
UPDATE tmp_dedup_events SET oa_country = 'UNKNOWN' WHERE oa_country = '' OR oa_country IS NULL;
-- NEW ORGANIZATIONS
INSERT INTO organizations(id, name, country, status, created_by, modified_by) SELECT oa_original_id, oa_name, oa_country, 'suggested', 'dedupWf', 'dedupWf' FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' ON CONFLICT DO NOTHING;
INSERT INTO acronyms(id, acronym) SELECT oa_original_id, oa_acronym FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' ON CONFLICT DO NOTHING;
INSERT INTO urls(id, url) SELECT oa_original_id, oa_url FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' ON CONFLICT DO NOTHING;
-- DUPLICATES
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by)
SELECT local_id, oa_original_id, oa_collectedfrom, 'dedupWf'
FROM tmp_dedup_events
WHERE local_id IS NOT NULL AND local_id != '' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id
ON CONFLICT DO NOTHING;
-- CONFLICTS
INSERT INTO oa_conflicts (id1, id2, idgroup, created_by)
SELECT local_id, oa_original_id, group_id, 'dedupWf'
FROM tmp_dedup_events
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id
ON CONFLICT DO NOTHING;
COMMIT;

View File

@ -1,15 +0,0 @@
DELETE FROM oa_duplicates WHERE reltype = 'suggested';
DELETE FROM oa_conflicts WHERE reltype = 'suggested';
UPDATE oa_conflicts SET idgroup = NULL;
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_name, oa_acronym, oa_country, oa_url, oa_collectedfrom)
SELECT local_id, oa_original_id, oa_name, oa_acronym, oa_country, oa_url, oa_collectedfrom
FROM tmp_simrels
WHERE oa_original_id NOT LIKE 'openorgs____::%'
ON CONFLICT DO NOTHING;
INSERT INTO oa_conflicts (id1, id2)
SELECT local_id, oa_original_id
FROM tmp_simrels
WHERE oa_original_id LIKE 'openorgs____::%'
ON CONFLICT DO NOTHING;

View File

@ -335,13 +335,13 @@ CREATE TABLE organizations (
creation_date timestamp with time zone DEFAULT now(),
modified_by text,
modification_date timestamp with time zone DEFAULT now(),
status text NOT NULL DEFAULT 'pending'
status text NOT NULL DEFAULT 'suggested'
);
CREATE INDEX organizations_type_idx ON organizations(type);
CREATE INDEX organizations_country_idx ON organizations(country);
CREATE TABLE other_ids (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
otherid text,
type text REFERENCES id_types(val),
PRIMARY KEY (id, otherid, type)
@ -349,7 +349,7 @@ CREATE TABLE other_ids (
CREATE INDEX other_ids_id_idx ON other_ids(id);
CREATE TABLE other_names (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
name text,
lang text REFERENCES languages(val),
PRIMARY KEY (id, name, lang)
@ -357,31 +357,31 @@ CREATE TABLE other_names (
CREATE INDEX other_names_id_idx ON other_names(id);
CREATE TABLE acronyms (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
acronym text,
PRIMARY KEY (id, acronym)
);
CREATE INDEX acronyms_id_idx ON acronyms(id);
CREATE TABLE relationships (
id1 text REFERENCES organizations(id) ON UPDATE CASCADE,
id1 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
reltype text,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
PRIMARY KEY (id1, reltype, id2)
);
CREATE INDEX relationships_id1_idx ON relationships(id1);
CREATE INDEX relationships_id2_idx ON relationships(id2);
CREATE TABLE urls (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
url text,
PRIMARY KEY (id, url)
);
CREATE INDEX urls_id_idx ON urls(id);
CREATE TABLE oa_duplicates (
local_id text REFERENCES organizations(id) ON UPDATE CASCADE,
oa_original_id text REFERENCES organizations(id) ON UPDATE CASCADE,
local_id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
oa_original_id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
oa_name text NOT NULL,
oa_acronym text,
oa_country text,
@ -389,6 +389,7 @@ CREATE TABLE oa_duplicates (
oa_collectedfrom text,
reltype text NOT NULL DEFAULT 'suggested',
creation_date timestamp DEFAULT NOW(),
created_by text,
modification_date timestamp,
modified_by text,
PRIMARY KEY (local_id, oa_original_id)
@ -419,11 +420,12 @@ GROUP BY
d.reltype;
CREATE TABLE oa_conflicts (
id1 text REFERENCES organizations(id) ON UPDATE CASCADE,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE,
id1 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
reltype text NOT NULL DEFAULT 'suggested',
idgroup text,
creation_date timestamp DEFAULT NOW(),
created_by text,
modification_date timestamp,
modified_by text,
PRIMARY KEY (id1, id2)
@ -513,9 +515,9 @@ CREATE VIEW suggestions_info_by_country_view AS SELECT c.val AS country,
coalesce(t2.n_conflicts, 0) AS n_conflicts,
coalesce(t3.n_pending_orgs, 0) AS n_pending_orgs
FROM countries c
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT d.*) AS n_duplicates FROM oa_duplicates d LEFT OUTER JOIN organizations o ON (d.local_id = o.id) WHERE d.reltype = 'suggested' GROUP BY o.country) AS t1 ON (t1.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT c.idgroup) AS n_conflicts FROM oa_conflicts c LEFT OUTER JOIN organizations o ON (c.id1 = o.id) WHERE c.reltype = 'suggested' GROUP BY o.country) AS t2 ON (t2.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT o.id) AS n_pending_orgs FROM organizations o WHERE o.status = 'pending' GROUP BY o.country) AS t3 ON (t3.country = c.val);
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT d.*) AS n_duplicates FROM oa_duplicates d LEFT OUTER JOIN organizations o ON (d.local_id = o.id) WHERE d.reltype = 'suggested' AND o.status = 'approved' GROUP BY o.country) AS t1 ON (t1.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT c.idgroup) AS n_conflicts FROM oa_conflicts c LEFT OUTER JOIN organizations o ON (c.id1 = o.id) WHERE c.reltype = 'suggested' AND o.status = 'approved' GROUP BY o.country) AS t2 ON (t2.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT o.id) AS n_pending_orgs FROM organizations o WHERE o.status = 'suggested' GROUP BY o.country) AS t3 ON (t3.country = c.val);
CREATE VIEW conflict_groups_view AS SELECT
c.idgroup AS idgroup,
@ -546,7 +548,7 @@ FROM
oa_duplicates d
LEFT OUTER JOIN organizations o ON (o.id = d.local_id)
WHERE
d.reltype = 'suggested'
d.reltype = 'suggested' AND o.status = 'approved'
GROUP BY o.id, o.name, o.city, o.country
ORDER BY o.name;

View File

@ -11,8 +11,8 @@
<org-tabs-menu org-id="{{orgId}}" info="info" org="org" events="events" selected="currentTab"></org-tabs-menu>
<org-form-metadata org-id="{{orgId}}" org="org" vocabularies="vocabularies" info-method="getInfo()" ng-if="currentTab == 1 && org.status == 'approved'" mode="update"></org-form-metadata>
<org-form-metadata org-id="{{orgId}}" org="org" vocabularies="vocabularies" info-method="getInfo()" ng-if="currentTab == 1 && org.status == 'pending'" mode="approve"></org-form-metadata>
<org-form-metadata org-id="{{orgId}}" org="org" vocabularies="vocabularies" info-method="getInfo()" ng-if="currentTab == 1 && org.status != 'approved' && org.status != 'pending'" mode="readonly"></org-form-metadata>
<org-form-metadata org-id="{{orgId}}" org="org" vocabularies="vocabularies" info-method="getInfo()" ng-if="currentTab == 1 && org.status == 'suggested'" mode="approve"></org-form-metadata>
<org-form-metadata org-id="{{orgId}}" org="org" vocabularies="vocabularies" info-method="getInfo()" ng-if="currentTab == 1 && org.status != 'approved' && org.status != 'suggested'" mode="readonly"></org-form-metadata>
<org-dedup-events org-id="{{orgId}}" events="events" vocabularies="vocabularies" info-method="getInfo()" ng-if="currentTab == 2 && (org.status == 'approved' || org.status == 'pending')"></org-dedup-events>
<org-dedup-events org-id="{{orgId}}" events="events" vocabularies="vocabularies" info-method="getInfo()" ng-if="currentTab == 2 && (org.status == 'approved' || org.status == 'suggested')"></org-dedup-events>
</div>

View File

@ -26,8 +26,8 @@
<span ng-if="!e.values.approved || e.values.approved == 0">-</span>
</td>
<td class="text-right">
<a href="#!{{resultsBasePath}}/0/50/pending/{{e.code}}" ng-if="e.values.pending && e.values.pending > 0">{{e.values.pending}}</a>
<span ng-if="!e.values.pending || e.values.pending == 0">-</span>
<a href="#!{{resultsBasePath}}/0/50/pending/{{e.code}}" ng-if="e.values.suggested && e.values.suggested > 0">{{e.values.suggested}}</a>
<span ng-if="!e.values.suggested || e.values.suggested == 0">-</span>
</td>
<td class="text-right">
<a href="#!{{resultsBasePath}}/0/50/deleted/{{e.code}}" ng-if="e.values.deleted && e.values.deleted > 0">{{e.values.deleted}}</a>

View File

@ -558,7 +558,7 @@ orgsModule.controller('pendingOrgsCtrl', function ($scope, $http, $routeParams,
$scope.orgs = [];
if ($scope.country != '_') {
$http.get('api/organizations/byCountry/pending/' + $scope.country).then(function successCallback(res) {
$http.get('api/organizations/byCountry/suggested/' + $scope.country).then(function successCallback(res) {
if((typeof res.data) == 'string') { alert("Session expired !"); location.reload(true); }
$scope.orgs = res.data;
}, function errorCallback(res) {