2020-01-28 16:53:59 +01:00
|
|
|
package eu.dnetlib.ariadneplus.elasticsearch;
|
|
|
|
|
2020-07-08 10:50:14 +02:00
|
|
|
import eu.dnetlib.ariadneplus.elasticsearch.model.*;
|
2020-06-10 19:39:53 +02:00
|
|
|
import eu.dnetlib.ariadneplus.reader.ResourceManager;
|
2020-06-12 18:14:41 +02:00
|
|
|
import org.apache.commons.logging.Log;
|
|
|
|
import org.apache.commons.logging.LogFactory;
|
2020-01-28 16:53:59 +01:00
|
|
|
import org.apache.http.HttpHost;
|
|
|
|
import org.elasticsearch.action.bulk.BulkRequest;
|
|
|
|
import org.elasticsearch.action.bulk.BulkResponse;
|
|
|
|
import org.elasticsearch.action.index.IndexRequest;
|
|
|
|
import org.elasticsearch.client.RequestOptions;
|
|
|
|
import org.elasticsearch.client.RestClient;
|
|
|
|
import org.elasticsearch.client.RestHighLevelClient;
|
|
|
|
import org.elasticsearch.common.xcontent.XContentType;
|
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
2020-06-12 18:14:41 +02:00
|
|
|
import javax.annotation.PostConstruct;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.lang.reflect.InvocationTargetException;
|
2020-07-07 13:39:22 +02:00
|
|
|
import java.util.Arrays;
|
2020-06-12 18:14:41 +02:00
|
|
|
|
2020-01-28 16:53:59 +01:00
|
|
|
@Service
|
|
|
|
public class BulkUpload {
|
|
|
|
|
2020-06-12 18:14:41 +02:00
|
|
|
private static final Log log = LogFactory.getLog(BulkUpload.class);
|
|
|
|
|
|
|
|
@Value("${elasticsearch.hostname}")
|
|
|
|
private String elasticSearchHostName;
|
|
|
|
|
|
|
|
@Value("${elasticsearch.indexname}")
|
|
|
|
private String elasticSearchIndexName;
|
2020-01-28 16:53:59 +01:00
|
|
|
|
|
|
|
private RestHighLevelClient client;
|
|
|
|
|
2020-06-16 02:36:16 +02:00
|
|
|
public void init(String elasticSearchHostName, String elasticSearchIndexName) throws IOException {
|
|
|
|
this.elasticSearchIndexName = elasticSearchIndexName;
|
|
|
|
client = new RestHighLevelClient(
|
|
|
|
RestClient.builder(
|
|
|
|
new HttpHost(elasticSearchHostName,9200,"http")));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-01-28 16:53:59 +01:00
|
|
|
@PostConstruct
|
2020-06-10 19:39:53 +02:00
|
|
|
public void init() throws IOException {
|
2020-01-28 16:53:59 +01:00
|
|
|
client = new RestHighLevelClient(
|
|
|
|
RestClient.builder(
|
2020-06-12 18:14:41 +02:00
|
|
|
new HttpHost(elasticSearchHostName,9200,"http")));
|
2020-01-28 16:53:59 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-07-22 23:13:47 +02:00
|
|
|
public int index(ResourceManager manager, boolean isCollection) {
|
2020-01-28 16:53:59 +01:00
|
|
|
BulkRequest request = new BulkRequest();
|
2020-07-22 23:13:47 +02:00
|
|
|
int esResponseCode = 0;
|
2020-01-28 16:53:59 +01:00
|
|
|
while (manager.hasNext()){
|
2020-06-10 19:39:53 +02:00
|
|
|
|
2020-01-28 16:53:59 +01:00
|
|
|
try {
|
2020-06-10 19:39:53 +02:00
|
|
|
Object next = manager.next();
|
|
|
|
AriadneCatalogEntry ace = ((AriadneCatalogEntry) next);
|
2020-07-07 13:39:22 +02:00
|
|
|
if (isCollection) {
|
|
|
|
ace.setResourceType("collection");
|
2020-07-13 11:03:21 +02:00
|
|
|
if (ace.getSpatial()==null) {
|
|
|
|
ace.setSpatial(Arrays.asList(new Spatial()));
|
|
|
|
}
|
2020-07-07 13:39:22 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
ace.setResourceType("dataset");
|
2020-07-08 22:00:28 +02:00
|
|
|
Spatial esSpatial = new Spatial();
|
|
|
|
ace.getSpatial().stream().filter(s -> s.getPlaceName()!=null).forEach(s -> {
|
|
|
|
esSpatial.setPlaceName(s.getPlaceName());
|
|
|
|
});
|
|
|
|
ace.getSpatial().stream().filter(s -> s.getLocation()!=null).forEach(s -> {
|
|
|
|
esSpatial.setLocation(s.getLocation());
|
|
|
|
});
|
|
|
|
ace.getSpatial().clear();
|
|
|
|
ace.setSpatial(Arrays.asList(esSpatial));
|
2020-07-07 13:39:22 +02:00
|
|
|
}
|
2020-07-08 22:00:28 +02:00
|
|
|
|
2020-07-13 11:03:21 +02:00
|
|
|
if (!isCollection) {
|
|
|
|
String uniqueIsPartOf = ace.getUniqueIsPartOf();
|
|
|
|
if (uniqueIsPartOf != null) {
|
|
|
|
ace.setIsPartOf(Arrays.asList(uniqueIsPartOf));
|
|
|
|
}
|
|
|
|
if (ace.getContributor() != null) {
|
|
|
|
ace.getContributor().clear();
|
|
|
|
ace.setContributor(ace.getCreator());
|
|
|
|
}
|
2020-07-17 17:42:08 +02:00
|
|
|
// Distribution distribution = new Distribution();
|
|
|
|
// AgentInfo distrPublisher = new AgentInfo();
|
|
|
|
// distrPublisher.setEmail("");
|
|
|
|
// distrPublisher.setName("");
|
|
|
|
// distrPublisher.setType("");
|
|
|
|
// distribution.setPublisher(Arrays.asList(distrPublisher));
|
|
|
|
// ace.setDistribution(Arrays.asList(distribution));
|
|
|
|
// ItemMetadataStructure ims = new ItemMetadataStructure();
|
|
|
|
// ace.setHasItemMetadataStructure(Arrays.asList(ims));
|
|
|
|
// MetadataRecord mr = new MetadataRecord();
|
|
|
|
// Dex dex = new Dex();
|
|
|
|
// mr.setConformsTo(Arrays.asList(dex));
|
|
|
|
// ace.setHasMetadataRecord(Arrays.asList(mr));
|
|
|
|
// if (!isCollection) {
|
|
|
|
// ace.setKeyword(Arrays.asList(new String("")));
|
|
|
|
// }
|
|
|
|
// AgentInfo sr = new AgentInfo();
|
|
|
|
// ace.setScientificResponsible(Arrays.asList(sr));
|
|
|
|
// AgentInfo tr = new AgentInfo();
|
|
|
|
// ace.setTechnicalResponsible(Arrays.asList(tr));
|
2020-07-07 13:39:22 +02:00
|
|
|
}
|
2020-07-24 12:59:59 +02:00
|
|
|
// AgentInfo testPublisher = new AgentInfo();
|
|
|
|
// testPublisher.setName("TEST");
|
|
|
|
// ace.getPublisher().add(testPublisher);
|
2020-06-10 19:39:53 +02:00
|
|
|
String[] splits = ace.getIdentifier().split("/");
|
2020-07-09 16:35:04 +02:00
|
|
|
|
|
|
|
if (ace.getAatSubjects() != null && ace.getDerivedSubject() != null) {
|
|
|
|
String aatSource = ace.getAatSubjects().get(0).getId();
|
|
|
|
ace.getDerivedSubject().forEach(d -> {
|
|
|
|
d.setSource(aatSource);
|
|
|
|
});
|
|
|
|
String [] aatSourceSplit = aatSource.split("/");
|
|
|
|
String aatSubjectId = aatSourceSplit[aatSourceSplit.length-1];
|
|
|
|
ace.getAatSubjects().forEach(s -> s.setId(aatSubjectId));
|
|
|
|
}
|
2020-07-13 11:03:21 +02:00
|
|
|
|
2020-07-17 17:42:08 +02:00
|
|
|
String idES = splits[splits.length-1];
|
2020-07-13 11:03:21 +02:00
|
|
|
request.add(new IndexRequest(elasticSearchIndexName).id(idES)
|
2020-01-28 16:53:59 +01:00
|
|
|
.source(ace.toJson(),XContentType.JSON));
|
2020-07-13 11:03:21 +02:00
|
|
|
|
2020-07-24 12:34:06 +02:00
|
|
|
// log.info("Indexing: "+idES+" :: "+ace.toJson());
|
2020-01-28 16:53:59 +01:00
|
|
|
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
|
2020-07-22 23:13:47 +02:00
|
|
|
if (bulkResponse!=null) {
|
|
|
|
esResponseCode = bulkResponse.status().getStatus();
|
2020-07-24 12:34:06 +02:00
|
|
|
// log.info("Indexing to ES completed with status: " + bulkResponse.status());
|
2020-07-22 23:13:47 +02:00
|
|
|
if (bulkResponse.hasFailures()) {
|
|
|
|
log.error("FailureMessage: " + bulkResponse.buildFailureMessage());
|
|
|
|
}
|
2020-06-10 19:39:53 +02:00
|
|
|
}
|
2020-07-23 23:12:07 +02:00
|
|
|
else {
|
|
|
|
esResponseCode = -3;
|
|
|
|
}
|
2020-07-22 23:13:47 +02:00
|
|
|
} catch (Exception e) {
|
|
|
|
log.error("Indexing "+e.getMessage());
|
|
|
|
return -1;
|
2020-01-28 16:53:59 +01:00
|
|
|
}
|
|
|
|
}
|
2020-07-22 23:13:47 +02:00
|
|
|
return esResponseCode;
|
2020-01-28 16:53:59 +01:00
|
|
|
}
|
|
|
|
}
|