es _id only for collection type record must be numeric format, for now we prepend 10000 to originalId value

This commit is contained in:
Enrico Ottonello 2020-07-13 11:03:21 +02:00
parent 6a804f176b
commit d6aafdf97d
4 changed files with 67 additions and 30 deletions

View File

@ -58,6 +58,9 @@ public class BulkUpload {
AriadneCatalogEntry ace = ((AriadneCatalogEntry) next); AriadneCatalogEntry ace = ((AriadneCatalogEntry) next);
if (isCollection) { if (isCollection) {
ace.setResourceType("collection"); ace.setResourceType("collection");
if (ace.getSpatial()==null) {
ace.setSpatial(Arrays.asList(new Spatial()));
}
} }
else { else {
ace.setResourceType("dataset"); ace.setResourceType("dataset");
@ -72,35 +75,40 @@ public class BulkUpload {
ace.setSpatial(Arrays.asList(esSpatial)); ace.setSpatial(Arrays.asList(esSpatial));
} }
String uniqueIsPartOf = ace.getUniqueIsPartOf(); if (!isCollection) {
if (uniqueIsPartOf!=null) { String uniqueIsPartOf = ace.getUniqueIsPartOf();
ace.setIsPartOf(Arrays.asList(uniqueIsPartOf)); if (uniqueIsPartOf != null) {
ace.setIsPartOf(Arrays.asList(uniqueIsPartOf));
}
if (ace.getContributor() != null) {
ace.getContributor().clear();
ace.setContributor(ace.getCreator());
}
Distribution distribution = new Distribution();
AgentInfo distrPublisher = new AgentInfo();
distrPublisher.setEmail("");
distrPublisher.setName("");
distrPublisher.setType("");
distribution.setPublisher(Arrays.asList(distrPublisher));
ace.setDistribution(Arrays.asList(distribution));
ItemMetadataStructure ims = new ItemMetadataStructure();
ace.setHasItemMetadataStructure(Arrays.asList(ims));
MetadataRecord mr = new MetadataRecord();
Dex dex = new Dex();
mr.setConformsTo(Arrays.asList(dex));
ace.setHasMetadataRecord(Arrays.asList(mr));
if (!isCollection) {
ace.setKeyword(Arrays.asList(new String("")));
}
AgentInfo sr = new AgentInfo();
ace.setScientificResponsible(Arrays.asList(sr));
AgentInfo tr = new AgentInfo();
ace.setTechnicalResponsible(Arrays.asList(tr));
} }
if (ace.getContributor()!=null) {
ace.getContributor().clear();
ace.setContributor(ace.getCreator());
}
Distribution distribution = new Distribution();
AgentInfo distrPublisher = new AgentInfo();
distrPublisher.setEmail("");
distrPublisher.setName("");
distrPublisher.setType("");
distribution.setPublisher(Arrays.asList(distrPublisher));
ace.setDistribution(Arrays.asList(distribution));
ItemMetadataStructure ims = new ItemMetadataStructure();
ace.setHasItemMetadataStructure(Arrays.asList(ims));
MetadataRecord mr = new MetadataRecord();
Dex dex = new Dex();
mr.setConformsTo(Arrays.asList(dex));
ace.setHasMetadataRecord(Arrays.asList(mr));
ace.setKeyword(Arrays.asList(new String("")));
AgentInfo sr = new AgentInfo();
ace.setScientificResponsible(Arrays.asList(sr));
AgentInfo tr = new AgentInfo();
ace.setTechnicalResponsible(Arrays.asList(tr));
AgentInfo testPublisher = new AgentInfo(); AgentInfo testPublisher = new AgentInfo();
testPublisher.setName("TEST"); testPublisher.setName("TEST");
testPublisher.setPhone(null);
testPublisher.setEmail(null);
ace.getPublisher().add(testPublisher); ace.getPublisher().add(testPublisher);
String[] splits = ace.getIdentifier().split("/"); String[] splits = ace.getIdentifier().split("/");
@ -113,8 +121,17 @@ public class BulkUpload {
String aatSubjectId = aatSourceSplit[aatSourceSplit.length-1]; String aatSubjectId = aatSourceSplit[aatSourceSplit.length-1];
ace.getAatSubjects().forEach(s -> s.setId(aatSubjectId)); ace.getAatSubjects().forEach(s -> s.setId(aatSubjectId));
} }
request.add(new IndexRequest(elasticSearchIndexName).id(splits[splits.length-1])
String idES;
if (isCollection) {
idES = "10000".concat(ace.getOriginalId());
}
else {
idES = splits[splits.length-1];
}
request.add(new IndexRequest(elasticSearchIndexName).id(idES)
.source(ace.toJson(),XContentType.JSON)); .source(ace.toJson(),XContentType.JSON));
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT); BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
log.info("Indexing to ES completed with status: "+bulkResponse.status()); log.info("Indexing to ES completed with status: "+bulkResponse.status());
if (bulkResponse.hasFailures()) { if (bulkResponse.hasFailures()) {

View File

@ -373,4 +373,12 @@ public class AriadneCatalogEntry {
public String toJson(){ public String toJson(){
return new Gson().toJson(this); return new Gson().toJson(this);
} }
public void setAccrualPeriodicity(String accrualPeriodicity) {
this.accrualPeriodicity = accrualPeriodicity;
}
public void setAudience(String audience) {
this.audience = audience;
}
} }

View File

@ -2,13 +2,25 @@ package eu.dnetlib.ariadneplus.reader.utils;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.temporal.ChronoField;
public class ESUtils { public class ESUtils {
private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy"); private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy");
private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder()
.appendPattern("yyyy")
.parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
.parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
.toFormatter();
public static String getESFormatDate(String originalDate) { public static String getESFormatDate(String originalDate) {
LocalDate parsedDate = LocalDate.parse(originalDate, originalRecordDateFormatter); try {
return parsedDate.format(elasticSearchDateFormatter); LocalDate parsedDate = LocalDate.parse(originalDate, originalRecordDateFormatter);
return parsedDate.format(elasticSearchDateFormatter);
} catch (Exception e) {
LocalDate parsedDate = LocalDate.parse(originalDate, yearOnlyDateFormatter);
return parsedDate.format(yearOnlyDateFormatter);
}
} }
} }

View File

@ -62,7 +62,7 @@ public class GraphDbReaderAndESIndexTest {
final ClassPathResource queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql"); final ClassPathResource queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql");
String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name()); String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name());
runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, false); runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, false);
// runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, true);
} }
@Test @Test