You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AriadnePlus/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/IndexOnESJobNode.java

221 lines
6.7 KiB
Java

package eu.dnetlib.ariadneplus.workflows.nodes;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
import eu.dnetlib.msro.workflows.nodes.is.ValidateProfilesJobNode;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.msro.workflows.procs.Token;
import eu.dnetlib.msro.workflows.util.ProgressProvider;
import eu.dnetlib.rmi.manager.MSROException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.List;
public class IndexOnESJobNode extends AsyncJobNode {
private static final Log log = LogFactory.getLog(IndexOnESJobNode.class);
private String publisherEndpoint;
private String datasourceInterface;
private String datasource;
private int currentResourceToIndex = 0;
private int totalResourceToIndex = 0;
// @Override
// protected String execute(final Env env) throws Exception {
//
// int statusCode = -1;
// String response = "";
// log.info("IndexOnES endpoint: " + getIndexOnESEndpoint());
// HttpClient client = null;
// try {
// String[] splits = getDatasourceInterface().split("::");
// String datasource = splits[2];
// String collectionId = splits[3];
//
// URI getURI = new URIBuilder(getIndexOnESEndpoint())
// .addParameter("datasource", datasource)
// .addParameter("collectionId", collectionId)
// .build();
// client = HttpClients.createDefault();
// HttpResponse res = client.execute(new HttpGet(getURI));
// response = EntityUtils.toString(res.getEntity());
// if (res.getStatusLine()!=null) {
// statusCode = res.getStatusLine().getStatusCode();
// }
//
// }
// catch (Throwable t) {
// log.error(t);
// throw new MSROException("Indexing on Elastic Search: " + t.getMessage());
// }
//
// finally{
// }
//
// env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "statusCode", Integer.toString(statusCode));
// env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "response", response);
//
// if (statusCode!=200) {
// throw new MSROException("Error from Publisher endpoint [ status code: " + statusCode + " ]");
// }
//
// return Arc.DEFAULT_ARC;
// }
@Override
protected String execute(final Env env) throws Exception {
final String collectionResourceType = "COLLECTION";
final String recordResourceType = "RECORD";
try {
String[] splits = getDatasourceInterface().split("::");
String datasource = splits[2];
String collectionId = splits[3];
List<String> collectionIdentifiers = selectIdentifiers(datasource, collectionId, collectionResourceType);
if (!collectionIdentifiers.isEmpty()) {
collectionIdentifiers.forEach(identifier -> {
try {
indexing(datasource, collectionId, collectionResourceType, cleanIdentifier(identifier));
} catch (Throwable t) {
log.error(identifier+" "+t);
}
});
}
List<String> recordIdentifiers = selectIdentifiers(datasource, collectionId, recordResourceType);
if (!recordIdentifiers.isEmpty()) {
recordIdentifiers.forEach(identifier -> {
try {
indexing(datasource, collectionId, recordResourceType, cleanIdentifier(identifier));
} catch (Throwable t) {
log.error(identifier+" "+t);
}
});
}
}
catch (Throwable t) {
log.error(t);
throw new MSROException("Indexing on Elastic Search: " + t.getMessage());
}
return Arc.DEFAULT_ARC;
}
public String getPublisherEndpoint() {
return publisherEndpoint;
}
private String getIndexOnESEndpoint() {
return publisherEndpoint.concat("/indexOnESByIdentifier");
}
private String getSelectIdentifiersEndpoint() {
return publisherEndpoint.concat("/selectIdentifiers");
}
public void setPublisherEndpoint(final String publisherEndpoint) {
this.publisherEndpoint = publisherEndpoint;
}
public String getDatasourceInterface() {
return datasourceInterface;
}
public void setDatasourceInterface(String datasourceInterface) {
this.datasourceInterface = datasourceInterface;
}
public String getDatasource() {
return datasource;
}
public void setDatasource(String datasource) {
this.datasource = datasource;
}
private String cleanIdentifier(String identifier) {
String cleaned = identifier;
try {
cleaned = identifier
.replace("[", "")
.replace("]", "")
.replace("\"", "");
}
catch (Exception e) {
}
return cleaned;
}
private String indexing(String datasource, String collectionId, String resourceType, String identifier) throws IOException, URISyntaxException {
int statusCode = -1;
String response = "";
String result = "";
String endpoint = getIndexOnESEndpoint();
HttpClient client = null;
URI postURI = new URIBuilder(endpoint)
.addParameter("datasource", datasource)
.addParameter("collectionId", collectionId)
.addParameter("resourceType", resourceType)
.addParameter("identifier", identifier)
.build();
client = HttpClients.createDefault();
HttpResponse res = client.execute(new HttpPost(postURI));
if (res.getStatusLine()!=null) {
statusCode = res.getStatusLine().getStatusCode();
}
HttpEntity entity = res.getEntity();
result = EntityUtils.toString(entity);
return result;
}
private List<String> selectIdentifiers(String datasource, String collectionId, String resourceType) throws Exception {
int statusCode = -1;
String response = "";
List<String> identifiers = null;
String endpoint = getSelectIdentifiersEndpoint();
HttpClient client = null;
URI getURI = new URIBuilder(endpoint)
.addParameter("datasource", datasource)
.addParameter("collectionId", collectionId)
.addParameter("resourceType", resourceType)
.build();
client = HttpClients.createDefault();
HttpResponse res = client.execute(new HttpGet(getURI));
if (res.getStatusLine()!=null) {
statusCode = res.getStatusLine().getStatusCode();
}
HttpEntity entity = res.getEntity();
String content = EntityUtils.toString(entity);
String[] identifiersStr = content.split(",");
identifiers = Arrays.asList(identifiersStr);
return identifiers;
}
@Override
protected void beforeStart(final Token token) {
token.setProgressProvider(new ProgressProvider() {
@Override
public String getProgressDescription() {
return IndexOnESJobNode.this.currentResourceToIndex + " / " + IndexOnESJobNode.this.totalResourceToIndex;
}
});
}
}