Refactoring license migration

This commit is contained in:
Thomas Georgios Giannos 2023-12-19 15:57:13 +02:00
parent f09e031e0c
commit f9a47f2de7
6 changed files with 144 additions and 56 deletions

View File

@ -12,6 +12,7 @@ public class DatasetCriteria extends Criteria {
private String like;
private List<UUID> datasetTemplates;
private Short status;
private List<UUID> ids;
private List<UUID> dmps;
private List<UUID> groupIds;
private List<UUID> grants;
@ -50,6 +51,14 @@ public class DatasetCriteria extends Criteria {
this.status = status;
}
public List<UUID> getIds() {
return ids;
}
public void setIds(List<UUID> ids) {
this.ids = ids;
}
public List<UUID> getDmps() {
return dmps;
}

View File

@ -1,5 +1,6 @@
package eu.old.eudat.elastic.repository;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.old.eudat.elastic.criteria.DatasetCriteria;
import eu.old.eudat.elastic.entities.Dataset;
import eu.old.eudat.elastic.entities.Dmp;
@ -13,10 +14,8 @@ import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.InnerHitBuilder;
import org.elasticsearch.index.query.NestedQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregationBuilder;
@ -78,6 +77,24 @@ public class DatasetRepository extends ElasticRepository<Dataset, DatasetCriteri
return null;
}
public List<Dataset> findByIds(List<String> ids) throws IOException {
if (this.getClient() != null) {
SearchRequest searchRequest = new SearchRequest(this.environment.getProperty("elasticsearch.index"));
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery().should(QueryBuilders.termsQuery("datasets.id.keyword", ids));
NestedQueryBuilder nestedQueryBuilder = QueryBuilders.nestedQuery( "datasets", boolQuery, ScoreMode.Avg).innerHit(new InnerHitBuilder());
searchSourceBuilder.query(nestedQueryBuilder);
searchSourceBuilder.size(100000);
searchRequest.source(searchSourceBuilder);
SearchResponse response = this.getClient().search(searchRequest, RequestOptions.DEFAULT);
return Arrays.stream(response.getHits().getHits())
.map(hit -> hit.getInnerHits().values()).flatMap(Collection::stream)
.map(SearchHits::getHits).flatMap(Arrays::stream)
.map(x -> new Dataset().fromElasticEntity(this.transformFromString(x.getSourceAsString(), Map.class))).toList();
}
return null;
}
@Override
public Dataset findDocument(String id) throws IOException {
if (this.getClient() != null) {
@ -205,7 +222,7 @@ public class DatasetRepository extends ElasticRepository<Dataset, DatasetCriteri
}
NestedQueryBuilder nestedQueryBuilder = QueryBuilders.nestedQuery("datasets", boolQuery, ScoreMode.None).innerHit(new InnerHitBuilder().setFetchSourceContext(new FetchSourceContext(true, new String[]{"datasets.id"}, null)).setSize(this.environment.getProperty("elasticsearch.innerHitsSize", Integer.class)));
searchSourceBuilder.query(nestedQueryBuilder)/*.from(criteria.getOffset()).size(criteria.getSize())*/.fetchSource("datasets.id", null);
searchSourceBuilder.query(nestedQueryBuilder)/*.from(criteria.getOffset()).size(criteria.getSize())*/;
sortBuilders.forEach(searchSourceBuilder::sort);
searchRequest.source(searchSourceBuilder);
SearchResponse response = this.getClient().search(searchRequest, RequestOptions.DEFAULT);
@ -267,6 +284,11 @@ public class DatasetRepository extends ElasticRepository<Dataset, DatasetCriteri
boolQuery = boolQuery.should(QueryBuilders.termQuery("datasets.status", criteria.getStatus().toString()));
}
if (criteria.getIds() != null && criteria.getIds().size() > 0) {
criteria.setIds(criteria.getIds().stream().filter(Objects::nonNull).collect(Collectors.toList()));
boolQuery = boolQuery.should(QueryBuilders.termsQuery("datasets.id.keyword", criteria.getIds().stream().map(UUID::toString).collect(Collectors.toList())));
}
if (criteria.getDmps() != null && criteria.getDmps().size() > 0) {
criteria.setDmps(criteria.getDmps().stream().filter(Objects::nonNull).collect(Collectors.toList()));
boolQuery = boolQuery.should(QueryBuilders.termsQuery("datasets.dmp", criteria.getDmps().stream().map(UUID::toString).collect(Collectors.toList())));

View File

@ -13,7 +13,7 @@ import java.io.IOException;
/**
* Created by ikalyvas on 7/5/2018.
*/
public abstract class ElasticRepository<T extends ElasticEntity,C extends Criteria> implements Repository<T,C> {
public abstract class ElasticRepository<T extends ElasticEntity<?>,C extends Criteria> implements Repository<T,C> {
private static final Logger logger = LoggerFactory.getLogger(ElasticRepository.class);
private RestHighLevelClient client;

View File

@ -118,53 +118,6 @@ public class DmpMigrationService {
contactEntity.setUserId((String) model.getExtraProperties().get("contact"));
dmpProperties.getContacts().add(contactEntity);
}
if (model.getExtraProperties().containsKey("license") && model.getExtraProperties().get("license") != null) {
Object license = model.getExtraProperties().get("license");
HashMap<String, String> licenseMap = jsonHandlingService.mapFromJson(jsonHandlingService.toJson(license));
ReferenceEntity referenceEntity = new ReferenceEntity();
if (licenseMap.containsKey("pid")) {
referenceEntity.setReference(licenseMap.get("pid"));
}
ReferenceQuery referenceQuery = queryFactory.query(ReferenceQuery.class)
.references(referenceEntity.getReference())
.types(ReferenceType.Licenses)
.isActive(IsActive.Active);
List<ReferenceEntity> foundReferences = referenceQuery.collect();
boolean licenseExists = foundReferences != null && !foundReferences.isEmpty();
if (!licenseExists) {
if (licenseMap.containsKey("name")) {
referenceEntity.setLabel(licenseMap.get("name"));
}
if (licenseMap.containsKey("uri")) {
referenceEntity.setSource(licenseMap.get("uri"));
}
if (licenseMap.containsKey("abbreviation")) {
referenceEntity.setAbbreviation(licenseMap.get("abbreviation"));
}
referenceEntity.setId(UUID.randomUUID());
referenceEntity.setSourceType(ReferenceSourceType.External);
referenceEntity.setType(ReferenceType.Licenses);
referenceEntity.setCreatedAt(Instant.now());
referenceEntity.setUpdatedAt(Instant.now());
referenceEntity.setIsActive(IsActive.Active);
}
DmpReferenceEntity dmpReferenceEntity = new DmpReferenceEntity();
dmpReferenceEntity.setId(UUID.randomUUID());
dmpReferenceEntity.setDmpId(data.getId());
if (licenseExists) {
dmpReferenceEntity.setReferenceId(foundReferences.getFirst().getId());
} else {
dmpReferenceEntity.setReferenceId(referenceEntity.getId());
}
dmpReferenceEntity.setCreatedAt(Instant.now());
dmpReferenceEntity.setUpdatedAt(Instant.now());
dmpReferenceEntity.setIsActive(IsActive.Active);
if (!licenseExists)
entityManager.persist(referenceEntity);
entityManager.persist(dmpReferenceEntity);
}
}
if (model.getProperties() != null) {
model.getProperties().forEach((key,val) -> {
@ -205,4 +158,93 @@ public class DmpMigrationService {
} while (items != null && !items.isEmpty() && !TestMode);
}
public void migrateDmpLicenses() throws JsonProcessingException {
DMPDao dmpDao = databaseRepository.getDmpDao();
long total = dmpDao.asQueryable().count();
logger.debug("Migrate Licenses for Dmp Total : " + total);
int page = 0;
Set<String> collectedLicenses = new HashSet<>();
Map<String, UUID> licenseIdByName = new HashMap<>();
List<DMP> items;
do {
items = dmpDao.asQueryable().orderBy((builder, root) -> builder.asc(root.get("created"))).orderBy((builder, root) -> builder.asc(root.get("ID"))).skip(page * PageSize).take(PageSize).toList();
if (items != null && !items.isEmpty()) {
logger.debug("Migrate Licenses for Dmp " + page * PageSize + " of " + total);
for (DMP item : items) {
DataManagementPlan model = new DataManagementPlan();
model.fromDataModel(item);
if (model.getExtraProperties() != null) {
if (model.getExtraProperties().containsKey("license") && model.getExtraProperties().get("license") != null) {
Object license = model.getExtraProperties().get("license");
HashMap<String, String> licenseMap = jsonHandlingService.mapFromJson(jsonHandlingService.toJson(license));
ReferenceEntity referenceEntity = new ReferenceEntity();
String licensePid;
if (licenseMap.containsKey("pid") && licenseMap.get("pid") != null) {
licensePid = licenseMap.get("pid");
referenceEntity.setReference(licensePid);
}
else
continue;
boolean licenseExists = collectedLicenses.contains(licensePid);
if (!licenseExists) {
if (licenseMap.containsKey("name")) {
referenceEntity.setLabel(licenseMap.get("name"));
}
if (licenseMap.containsKey("uri")) {
referenceEntity.setSource(licenseMap.get("uri"));
}
if (licenseMap.containsKey("abbreviation")) {
referenceEntity.setAbbreviation(licenseMap.get("abbreviation"));
}
if (licenseMap.containsKey("source") && licenseMap.get("source") != null) {
logger.debug("License found with source '{}'", licenseMap.get("source"));
}
if (licenseMap.containsKey("created") && licenseMap.get("created") != null) {
referenceEntity.setUpdatedAt(Instant.parse(licenseMap.get("created")));
} else {
referenceEntity.setCreatedAt(Instant.now());
}
if (licenseMap.containsKey("modified") && licenseMap.get("modified") != null) {
referenceEntity.setUpdatedAt(Instant.parse(licenseMap.get("modified")));
} else {
referenceEntity.setUpdatedAt(Instant.now());
}
// if (licenseMap.containsKey("tag") && licenseMap.get("tag") != null && !"Open Definition".equals(licenseMap.get("tag"))) {
// logger.debug("License found with tag '{}'", licenseMap.get("tag"));
// }
referenceEntity.setId(UUID.randomUUID());
referenceEntity.setSourceType(ReferenceSourceType.External);
referenceEntity.setType(ReferenceType.Licenses);
referenceEntity.setIsActive(IsActive.Active);
collectedLicenses.add(licensePid);
licenseIdByName.put(licensePid, referenceEntity.getId());
logger.debug("License '{}' migrated", licensePid);
}
DmpReferenceEntity dmpReferenceEntity = new DmpReferenceEntity();
dmpReferenceEntity.setId(UUID.randomUUID());
dmpReferenceEntity.setDmpId(item.getId());
dmpReferenceEntity.setReferenceId(licenseIdByName.get(licensePid));
dmpReferenceEntity.setCreatedAt(Instant.now());
dmpReferenceEntity.setUpdatedAt(Instant.now());
dmpReferenceEntity.setIsActive(IsActive.Active);
if (!licenseExists)
entityManager.persist(referenceEntity);
entityManager.persist(dmpReferenceEntity);
}
}
this.entityManager.flush();
}
page++;
}
} while (items != null && !items.isEmpty() && !TestMode);
logger.info("Dmp licenses migration finished");
}
}

View File

@ -7,6 +7,7 @@ import eu.eudat.data.DescriptionTagEntity;
import eu.eudat.data.TagEntity;
import eu.old.eudat.data.dao.entities.DatasetDao;
import eu.old.eudat.data.entities.Dataset;
import eu.old.eudat.elastic.criteria.DatasetCriteria;
import eu.old.eudat.elastic.entities.Tag;
import eu.old.eudat.elastic.repository.DatasetRepository;
import eu.old.eudat.logic.services.operations.DatabaseRepository;
@ -50,18 +51,24 @@ public class TagMigrationService {
Set<String> savedTagNames = new HashSet<>();
Map<String, UUID> savedTagIdsByName = new HashMap<>();
DatasetCriteria criteria = new DatasetCriteria();
List<Dataset> items;
do {
items = datasetDao.asQueryable().orderBy((builder, root) -> builder.asc(root.get("created"))).orderBy((builder, root) -> builder.asc(root.get("ID"))).skip(page * PageSize).take(PageSize).toList();
if (items != null && !items.isEmpty()) {
logger.debug("Migrate Dataset tags " + page * PageSize + " of " + total);
for (Dataset item : items) {
eu.old.eudat.elastic.entities.Dataset elasticDataset = this.datasetRepository.findDocument(item.getId().toString());
if (elasticDataset == null) {
criteria.setIds(items.stream().map(Dataset::getId).toList());
// List<eu.old.eudat.elastic.entities.Dataset> elasticDatasets = this.datasetRepository.queryIds(criteria);
List<eu.old.eudat.elastic.entities.Dataset> elasticDatasets = this.datasetRepository.findByIds(items.stream().map(x -> x.getId().toString()).toList());
for (Dataset item : items) {
List<eu.old.eudat.elastic.entities.Dataset> found = elasticDatasets.stream().filter(x -> item.getId().toString().equals(x.getId())).toList();
if (found.isEmpty()) {
logger.error("No dataset with id {} found on elastic search. Skipping tag migration for this dataset", item.getId());
continue;
}
eu.old.eudat.elastic.entities.Dataset elasticDataset = found.getFirst();
boolean tagAlreadyExists;
if (elasticDataset.getTags() != null && !elasticDataset.getTags().isEmpty()) {
for(Tag tag : elasticDataset.getTags()) {

View File

@ -87,6 +87,7 @@ public class MigrationController {
this.serviceMigrationService.migrate();
this.dmpMigrationService.migrate();
this.dmpMigrationService.migrateDmpLicenses();
this.dmpDatasetProfileMigrationService.migrate();
this.datasetMigrationService.migrate();
this.tagMigrationService.migrate();
@ -125,6 +126,13 @@ public class MigrationController {
return true;
}
@GetMapping("dmps/licenses")
@Transactional
public boolean migrateDmpLicenses() throws JsonProcessingException, NoSuchFieldException, IllegalAccessException {
this.dmpMigrationService.migrateDmpLicenses();
return true;
}
@GetMapping("datasets")
@Transactional
public boolean migrateDatasets() throws IOException, JAXBException, ParserConfigurationException, InstantiationException, IllegalAccessException, SAXException {