duplicates addition

This commit is contained in:
Michele Artini 2021-04-20 11:43:17 +02:00
parent e71bb3657a
commit 5f1a46cc51
9 changed files with 29 additions and 311 deletions

View File

@ -29,7 +29,6 @@ import eu.dnetlib.common.controller.AbstractDnetController;
import eu.dnetlib.organizations.model.JournalEntry;
import eu.dnetlib.organizations.model.Note;
import eu.dnetlib.organizations.model.OpenaireDuplicate;
import eu.dnetlib.organizations.model.OrganizationBase;
import eu.dnetlib.organizations.model.utils.BrowseEntry;
import eu.dnetlib.organizations.model.utils.OrganizationConflict;
import eu.dnetlib.organizations.model.view.ConflictGroupView;
@ -43,7 +42,6 @@ import eu.dnetlib.organizations.repository.NoteRepository;
import eu.dnetlib.organizations.repository.UserCountryRepository;
import eu.dnetlib.organizations.repository.readonly.ConflictGroupViewRepository;
import eu.dnetlib.organizations.repository.readonly.DuplicateGroupViewRepository;
import eu.dnetlib.organizations.repository.readonly.DuplicateSearchViewRepository;
import eu.dnetlib.organizations.repository.readonly.OpenaireDuplicateViewRepository;
import eu.dnetlib.organizations.repository.readonly.OrganizationInfoViewRepository;
import eu.dnetlib.organizations.repository.readonly.OrganizationSimpleViewRepository;
@ -65,8 +63,6 @@ public class OrganizationController extends AbstractDnetController {
@Autowired
private OpenaireDuplicateViewRepository openaireDuplicateViewRepository;
@Autowired
private DuplicateSearchViewRepository duplicateSearchViewRepository;
@Autowired
private ConflictGroupViewRepository conflictGroupViewRepository;
@Autowired
private SuggestionInfoViewByCountryRepository suggestionInfoViewByCountryRepository;
@ -221,32 +217,25 @@ public class OrganizationController extends AbstractDnetController {
}
@GetMapping("/search/{page}/{size}")
public Page<? extends OrganizationBase> search(@PathVariable final int page,
public Page<OrganizationSimpleView> search(@PathVariable final int page,
@PathVariable final int size,
@RequestParam final String q,
@RequestParam(required = false, defaultValue = "") final String status,
final Authentication authentication) {
if (StringUtils.isNotBlank(status) && status.equals("search_duplicate")) {
return UserInfo.isSuperAdmin(authentication)
? duplicateSearchViewRepository.search(q, PageRequest.of(page, size))
: duplicateSearchViewRepository.searchForUser(q, UserInfo.getEmail(authentication), PageRequest.of(page, size));
final List<String> statuses;
if (StringUtils.isNotBlank(status)) {
statuses = Arrays.asList(status.split(","));
} else if (UserInfo.isSimpleUser(authentication)) {
statuses = Arrays.asList(OrganizationStatus.approved.toString());
} else {
final List<String> statuses;
if (StringUtils.isNotBlank(status)) {
statuses = Arrays.asList(status.split(","));
} else if (UserInfo.isSimpleUser(authentication)) {
statuses = Arrays.asList(OrganizationStatus.approved.toString());
} else {
statuses = Arrays.asList(OrganizationStatus.approved.toString(), OrganizationStatus.suggested.toString());
}
return UserInfo.isSuperAdmin(authentication)
? organizationSimpleViewRepository.search(q, statuses, PageRequest.of(page, size))
: organizationSimpleViewRepository.searchForUser(q, UserInfo.getEmail(authentication), statuses, PageRequest.of(page, size));
statuses = Arrays.asList(OrganizationStatus.approved.toString(), OrganizationStatus.suggested.toString());
}
return UserInfo.isSuperAdmin(authentication)
? organizationSimpleViewRepository.search(q, statuses, PageRequest.of(page, size))
: organizationSimpleViewRepository.searchForUser(q, UserInfo.getEmail(authentication), statuses, PageRequest.of(page, size));
}
@GetMapping("/byCountry/{status}/{code}/{page}/{size}")

View File

@ -1,37 +0,0 @@
package eu.dnetlib.organizations.model;
public interface OrganizationBase {
String getId();
void setId(final String id);
String getName();
void setName(final String name);
String getType();
void setType(final String type);
String getCity();
void setCity(final String city);
String getCountry();
void setCountry(final String country);
String[] getAcronyms();
void setAcronyms(final String[] acronyms);
String getStatus();
void setStatus(final String status);
String[] getUrls();
void setUrls(final String[] urls);
}

View File

@ -1,158 +0,0 @@
package eu.dnetlib.organizations.model.view;
import java.io.Serializable;
import java.util.Objects;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Table;
import org.hibernate.annotations.Type;
import org.hibernate.annotations.TypeDef;
import org.hibernate.annotations.TypeDefs;
import com.vladmihalcea.hibernate.type.array.StringArrayType;
import eu.dnetlib.organizations.model.OrganizationBase;
@Entity
@Table(name = "duplicate_search_view")
@TypeDefs({
@TypeDef(name = "string-array", typeClass = StringArrayType.class)
})
public class DuplicateSearchView implements OrganizationBase, Serializable, Comparable<DuplicateSearchView> {
/**
*
*/
private static final long serialVersionUID = -2790140170438505458L;
@Id
@Column(name = "id")
private String id;
@Column(name = "name")
private String name;
@Column(name = "type")
private String type;
@Column(name = "city")
private String city;
@Column(name = "country")
private String country;
@Type(type = "string-array")
@Column(name = "acronyms", columnDefinition = "text[]")
private String[] acronyms;
@Type(type = "string-array")
@Column(name = "urls", columnDefinition = "text[]")
private String[] urls;
@Column(name = "status")
private String status;
public DuplicateSearchView() {}
public DuplicateSearchView(final String id) {
this.id = id;
}
public DuplicateSearchView(final String id, final String name, final String type, final String city, final String country, final String[] acronyms,
final String status) {
this.id = id;
this.name = name;
this.type = type;
this.city = city;
this.country = country;
this.acronyms = acronyms;
this.status = status;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(final String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(final String type) {
this.type = type;
}
public String getCity() {
return city;
}
public void setCity(final String city) {
this.city = city;
}
public String getCountry() {
return country;
}
public void setCountry(final String country) {
this.country = country;
}
public String[] getAcronyms() {
return acronyms;
}
public void setAcronyms(final String[] acronyms) {
this.acronyms = acronyms;
}
public String getStatus() {
return status;
}
public void setStatus(final String status) {
this.status = status;
}
public String[] getUrls() {
return urls;
}
public void setUrls(final String[] urls) {
this.urls = urls;
}
@Override
public int hashCode() {
return Objects.hash(id);
}
@Override
public boolean equals(final Object obj) {
if (this == obj) { return true; }
if (obj == null) { return false; }
if (!(obj instanceof DuplicateSearchView)) { return false; }
final DuplicateSearchView other = (DuplicateSearchView) obj;
return Objects.equals(id, other.id);
}
@Override
public int compareTo(final DuplicateSearchView o) {
return id.compareTo(o.getId());
}
}

View File

@ -14,14 +14,12 @@ import org.hibernate.annotations.TypeDefs;
import com.vladmihalcea.hibernate.type.array.StringArrayType;
import eu.dnetlib.organizations.model.OrganizationBase;
@Entity
@Table(name = "organizations_simple_view")
@TypeDefs({
@TypeDef(name = "string-array", typeClass = StringArrayType.class)
})
public class OrganizationSimpleView implements OrganizationBase, Serializable, Comparable<OrganizationSimpleView> {
public class OrganizationSimpleView implements Serializable, Comparable<OrganizationSimpleView> {
/**
*
@ -72,82 +70,66 @@ public class OrganizationSimpleView implements OrganizationBase, Serializable, C
this.status = status;
}
@Override
public String getId() {
return id;
}
@Override
public void setId(final String id) {
this.id = id;
}
@Override
public String getName() {
return name;
}
@Override
public void setName(final String name) {
this.name = name;
}
@Override
public String getType() {
return type;
}
@Override
public void setType(final String type) {
this.type = type;
}
@Override
public String getCity() {
return city;
}
@Override
public void setCity(final String city) {
this.city = city;
}
@Override
public String getCountry() {
return country;
}
@Override
public void setCountry(final String country) {
this.country = country;
}
@Override
public String[] getAcronyms() {
return acronyms;
}
@Override
public void setAcronyms(final String[] acronyms) {
this.acronyms = acronyms;
}
@Override
public String getStatus() {
return status;
}
@Override
public void setStatus(final String status) {
this.status = status;
}
@Override
public String[] getUrls() {
return urls;
}
@Override
public void setUrls(final String[] urls) {
this.urls = urls;
}

View File

@ -1,22 +0,0 @@
package eu.dnetlib.organizations.repository.readonly;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import eu.dnetlib.organizations.model.view.DuplicateSearchView;
@Repository
public interface DuplicateSearchViewRepository extends ReadOnlyRepository<DuplicateSearchView, String> {
// SEARCH
@Query(value = "select o.* from duplicate_search_view o left outer join org_index_search idx on (idx.id = o.id) where idx.txt @@ plainto_tsquery(:text) order by o.name", nativeQuery = true)
Page<DuplicateSearchView> search(@Param("text") String text, Pageable pageable);
// SEARCH FOR USER
@Query(value = "select o.* from duplicate_search_view o left outer join org_index_search idx on (idx.id = o.id) left outer join user_countries uc on (uc.country = o.country) where idx.txt @@ plainto_tsquery(:text) and uc.email = :email order by o.name", nativeQuery = true)
Page<DuplicateSearchView> searchForUser(@Param("text") String text, @Param("email") String email, Pageable pageable);
}

View File

@ -210,38 +210,20 @@ public class DatabaseUtils {
public void saveDuplicates(final List<OpenaireDuplicate> simrels, final String user) {
final OffsetDateTime now = OffsetDateTime.now();
final List<OpenaireDuplicate> list = openaireDuplicateRepository.saveAll(simrels);
final List<OpenaireDuplicate> list = openaireDuplicateRepository.saveAll(simrels.stream()
.filter(d -> !d.getOaOriginalId().startsWith(OpenOrgsConstants.OPENORGS_PENDING_PREFIX))
.collect(Collectors.toList()));
simrels.stream()
.map(OpenaireDuplicate::getOaOriginalId)
.filter(id -> id.startsWith(OpenOrgsConstants.OPENORGS_PENDING_PREFIX))
.forEach(organizationRepository::deleteById);
list.forEach(d -> {
openaireDuplicateRepository.updateCreatedByIfMissing(d.getLocalId(), d.getOaOriginalId(), user);
openaireDuplicateRepository.updateModificationDate(d.getLocalId(), d.getOaOriginalId(), user, now);
});
for (final OpenaireDuplicate curr : list) {
if (curr.getRelType().equals(SimilarityType.is_similar.toString())) {
openaireDuplicateRepository.findByOaOriginalId(curr.getOaOriginalId())
.stream()
.filter(d -> !d.getLocalId().equals(curr.getLocalId()))
.forEach(d -> {
if (d.getLocalId().startsWith(OpenOrgsConstants.OPENORGS_PENDING_PREFIX)) {
log.info("Removing useless pending org: " + d.getLocalId());
organizationRepository.deleteById(d.getLocalId());
} else if (d.getRelType().equals(SimilarityType.suggested.toString())) {
log.info("Removing invalid suggestion: " + d);
openaireDuplicateRepository.delete(d);
} else if (d.getRelType().equals(SimilarityType.is_similar.toString())) {
log.warn("***");
log.warn("More is_similar relations");
log.warn("rel1 ->" + d);
log.warn("rel1 ->" + curr);
log.warn("***");
} else {
// is_different: nothing todo
}
});
}
}
final String message = String.format("Duplicates updated (%s similars, %s differents, %s suggested)", list.stream()
.filter(d -> d.getRelType().equals(SimilarityType.is_similar.toString()))
.count(), list.stream()

View File

@ -13,6 +13,12 @@ UPDATE tmp_dedup_events SET local_id = oa_original_id WHERE local_id = '' OR lo
UPDATE tmp_dedup_events SET oa_country = 'UNKNOWN' WHERE oa_country = '' OR oa_country IS NULL;
UPDATE tmp_dedup_events SET oa_name = oa_acronym WHERE oa_name = '' OR oa_name IS NULL;
DELETE FROM tmp_dedup_events WHERE oa_name = '' OR oa_name IS NULL;
-- delete invalid relations (a raw org can not be suggested to multiple orgs)
DELETE FROM tmp_dedup_events WHERE oa_original_id IN (
SELECT oa_original_id
FROM tmp_dedup_events
GROUP BY oa_original_id HAVING count(*) > 1)
AND local_id NOT LIKE 'openorgs____::%';
-- IMPORT MISSING TERMS
INSERT INTO id_types(val, name) SELECT distinct arr[2], arr[2] FROM (SELECT string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%') as c ON CONFLICT DO NOTHING;

View File

@ -572,30 +572,6 @@ GROUP BY
org.country,
org.status;
CREATE VIEW duplicate_search_view AS SELECT
org.id,
org.name,
org.type,
org.city,
org.country,
org.status,
array_remove(array_agg(DISTINCT a.acronym), NULL::text) AS acronyms,
array_remove(array_agg(DISTINCT u.url), NULL::text) AS urls
FROM organizations org
LEFT JOIN acronyms a ON org.id = a.id
LEFT JOIN urls u ON org.id = u.id
LEFT OUTER JOIN oa_duplicates d ON (org.id = d.oa_original_id)
WHERE
org.status = 'duplicate'
GROUP BY
org.id,
org.name,
org.type,
org.city,
org.country,
org.status
HAVING not('is_similar' = ANY(array_agg(d.reltype)));
CREATE VIEW users_view AS SELECT
u.email,
u.valid,

View File

@ -14,7 +14,7 @@
</tr>
</thead>
<tbody>
<tr class="d-flex" ng-repeat="sr in duplicates">
<tr class="d-flex" ng-repeat="sr in duplicates" ng-hide="sr.oaOriginalId.startsWith('pending_org_::')">
<td class="col-4 pl-3">{{sr.oaName}}
<span class="small" ng-repeat="oid in sr.otherIdentifiers">
<br /><b>PID ({{oid.type}}): </b>{{oid.id}}
@ -68,4 +68,4 @@
</div>
<select-org-modal modal-id="addDuplicateModal" selected-org="newDuplicate" filter-status="search_duplicate" on-select="addDuplicate()"></select-org-modal>
<select-org-modal modal-id="addDuplicateModal" selected-org="newDuplicate" filter-status="suggested" on-select="addDuplicate()"></select-org-modal>