2019-06-18 16:15:23 +02:00
|
|
|
package eu.dnetlib.ariadneplus.workflows.nodes;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
2019-12-12 12:58:30 +01:00
|
|
|
import java.net.ConnectException;
|
2019-06-18 16:15:23 +02:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
|
import java.util.concurrent.Executors;
|
|
|
|
import java.util.concurrent.Future;
|
|
|
|
|
|
|
|
import org.apache.commons.logging.Log;
|
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
import org.apache.http.NameValuePair;
|
|
|
|
import org.apache.http.client.entity.UrlEncodedFormEntity;
|
2019-12-16 18:52:08 +01:00
|
|
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
2019-06-18 16:15:23 +02:00
|
|
|
import org.apache.http.client.methods.HttpPost;
|
2019-12-16 18:52:08 +01:00
|
|
|
import org.apache.http.impl.client.CloseableHttpClient;
|
2019-06-18 16:15:23 +02:00
|
|
|
import org.apache.http.impl.client.HttpClients;
|
2019-12-16 18:52:08 +01:00
|
|
|
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
|
2019-06-18 16:15:23 +02:00
|
|
|
import org.apache.http.message.BasicNameValuePair;
|
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
2019-12-13 14:55:51 +01:00
|
|
|
import com.google.common.collect.Lists;
|
|
|
|
import com.google.common.collect.Maps;
|
|
|
|
import com.google.gson.Gson;
|
|
|
|
|
|
|
|
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
|
|
|
|
import eu.dnetlib.msro.workflows.graph.Arc;
|
|
|
|
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
|
|
|
|
import eu.dnetlib.msro.workflows.procs.Env;
|
2019-12-18 15:55:53 +01:00
|
|
|
import eu.dnetlib.msro.workflows.procs.Token;
|
|
|
|
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider;
|
2019-12-13 14:55:51 +01:00
|
|
|
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
|
|
|
|
import eu.dnetlib.rmi.common.ResultSet;
|
|
|
|
import eu.dnetlib.rmi.manager.MSROException;
|
|
|
|
|
2019-06-18 16:15:23 +02:00
|
|
|
|
2019-12-12 12:58:30 +01:00
|
|
|
public class PublishGraphDBJobNode extends AsyncJobNode {
|
2019-06-18 16:15:23 +02:00
|
|
|
|
2019-12-12 12:58:30 +01:00
|
|
|
private static final Log log = LogFactory.getLog(PublishGraphDBJobNode.class);
|
|
|
|
|
|
|
|
private String eprParam;
|
2019-06-18 16:15:23 +02:00
|
|
|
|
|
|
|
@Autowired
|
|
|
|
private ResultSetClient resultSetClient;
|
|
|
|
|
|
|
|
private String publisherEndpoint;
|
2019-12-13 14:55:51 +01:00
|
|
|
private String datasourceInterface;
|
2020-01-24 10:52:04 +01:00
|
|
|
private String datasource;
|
2019-06-18 16:15:23 +02:00
|
|
|
|
|
|
|
//for parallel requests to the publisher endpoint
|
|
|
|
private int nThreads = 5;
|
2019-12-16 18:52:08 +01:00
|
|
|
private int nTasks = 150;
|
2019-06-18 16:15:23 +02:00
|
|
|
private ExecutorService executorService = Executors.newFixedThreadPool(nThreads);
|
|
|
|
private List<Future<Integer>> resList = Lists.newArrayList();
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected String execute(final Env env) throws Exception {
|
|
|
|
|
2019-12-12 12:58:30 +01:00
|
|
|
final ResultSet<?> rsIn = env.getAttribute(getEprParam(), ResultSet.class);
|
|
|
|
if ((rsIn == null)) { throw new MSROException("EprParam (" + getEprParam() + ") not found in ENV"); }
|
2019-06-18 16:15:23 +02:00
|
|
|
|
|
|
|
int countAll = 0;
|
|
|
|
int countOk = 0;
|
2019-12-16 18:52:08 +01:00
|
|
|
int partial = 0;
|
2019-06-18 16:15:23 +02:00
|
|
|
Map<Integer, Integer> errors = Maps.newHashMap();
|
2019-12-16 18:52:08 +01:00
|
|
|
log.info("Publisher endpoint: " + getPublisherEndpoint());
|
|
|
|
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
|
|
|
|
cm.setMaxTotal(nThreads);
|
|
|
|
CloseableHttpClient client = HttpClients.custom().setConnectionManager(cm).build();
|
2020-01-14 16:55:45 +01:00
|
|
|
|
|
|
|
log.info("DropDatasourceApisPartitionInfo endpoint: " + getDropDatasourceApisPartitionInfoEndpoint());
|
|
|
|
CloseableHttpResponse responseDDAPIPOST = null;
|
|
|
|
try {
|
|
|
|
HttpPost post = new HttpPost(getDropDatasourceApisPartitionInfoEndpoint());
|
|
|
|
List<NameValuePair> params = Lists.newArrayList();
|
|
|
|
String datasourceInterfaceValue = getDatasourceInterface();
|
|
|
|
log.info("drop datasourceApis partition info for datasourceInterface " + datasourceInterfaceValue);
|
|
|
|
params.add(new BasicNameValuePair("datasourceApi", datasourceInterfaceValue));
|
|
|
|
UrlEncodedFormEntity ent = new UrlEncodedFormEntity(params, "UTF-8");
|
|
|
|
post.setEntity(ent);
|
|
|
|
responseDDAPIPOST = client.execute(post);
|
|
|
|
int statusCode = responseDDAPIPOST.getStatusLine().getStatusCode();
|
|
|
|
switch (statusCode) {
|
|
|
|
case 200:
|
|
|
|
log.info("drop datasourceApis partition info completed");
|
|
|
|
break;
|
|
|
|
default:
|
2020-06-05 16:19:54 +02:00
|
|
|
log.error("error dropping datasourceApis partition info " + statusCode + ": " + responseDDAPIPOST.getStatusLine().getReasonPhrase());
|
2020-01-14 16:55:45 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
} catch (ConnectException ce) {
|
|
|
|
throw new MSROException("unable to connect to Publisher endpoint" + getPublishEndpoint());
|
|
|
|
}
|
|
|
|
catch (IOException e) {
|
|
|
|
log.error("error feeding provenance ", e);
|
|
|
|
}
|
|
|
|
finally{
|
|
|
|
if(responseDDAPIPOST != null) responseDDAPIPOST.close();
|
|
|
|
}
|
|
|
|
|
2019-12-16 18:52:08 +01:00
|
|
|
//need to slow down the producer to avoid OOM errors due to many tasks in the queue of the executor
|
|
|
|
//see for example here: https://stackoverflow.com/questions/42108351/executorservice-giving-out-of-memory-error
|
|
|
|
//let's stop and wait after submission of nLatch tasks
|
2020-06-05 16:19:54 +02:00
|
|
|
boolean forceExit = false;
|
2019-06-18 16:15:23 +02:00
|
|
|
for (String record : getResultSetClient().iter(rsIn, String.class)) {
|
2020-06-05 16:19:54 +02:00
|
|
|
if(forceExit) break;
|
2019-06-18 16:15:23 +02:00
|
|
|
countAll++;
|
2019-12-16 18:52:08 +01:00
|
|
|
if(partial == nTasks) {
|
|
|
|
log.debug("Waiting for tasks to complete before resubmitting to executor (countAll = "+countAll+") . . . ");
|
|
|
|
log.debug("Getting replies");
|
|
|
|
long startWait = System.currentTimeMillis();
|
|
|
|
for(Future<Integer> res : resList){
|
|
|
|
if(res.get() == 200) countOk++;
|
2020-06-05 16:19:54 +02:00
|
|
|
if(res.get() == 400 || res.get() == 401 || res.get() == 402 | res.get() == 403 || res.get() == 404){
|
|
|
|
executorService.shutdownNow();
|
|
|
|
throw new MSROException("Client error "+ res.get());
|
|
|
|
}
|
2019-12-16 18:52:08 +01:00
|
|
|
}
|
|
|
|
resList.clear();
|
|
|
|
partial = 0;
|
|
|
|
log.debug(". . . Ready to submit again after "+(System.currentTimeMillis() - startWait)+" ms" );
|
|
|
|
}
|
|
|
|
partial++;
|
2019-06-18 16:15:23 +02:00
|
|
|
Future<Integer> res = executorService.submit( () -> {
|
2020-01-14 16:55:45 +01:00
|
|
|
CloseableHttpResponse responsePPOST = null;
|
2019-06-18 16:15:23 +02:00
|
|
|
try {
|
2019-12-13 14:55:51 +01:00
|
|
|
HttpPost post = new HttpPost(getPublishEndpoint());
|
2019-06-18 16:15:23 +02:00
|
|
|
List<NameValuePair> params = Lists.newArrayList();
|
|
|
|
params.add(new BasicNameValuePair("record", record));
|
|
|
|
UrlEncodedFormEntity ent = new UrlEncodedFormEntity(params, "UTF-8");
|
|
|
|
post.setEntity(ent);
|
2020-01-14 16:55:45 +01:00
|
|
|
responsePPOST = client.execute(post);
|
|
|
|
int statusCode = responsePPOST.getStatusLine().getStatusCode();
|
2019-06-18 16:15:23 +02:00
|
|
|
switch (statusCode) {
|
|
|
|
case 200:
|
|
|
|
return statusCode;
|
|
|
|
default:
|
2020-01-14 16:55:45 +01:00
|
|
|
log.error(responsePPOST.getStatusLine().getStatusCode() + ": " + responsePPOST.getStatusLine().getReasonPhrase());
|
2019-06-18 16:15:23 +02:00
|
|
|
log.error("Source record causing error: " + record);
|
|
|
|
errors.merge(statusCode, 1, Integer::sum);
|
|
|
|
return statusCode;
|
|
|
|
}
|
2019-12-12 12:58:30 +01:00
|
|
|
} catch (ConnectException ce) {
|
2019-12-13 14:55:51 +01:00
|
|
|
throw new MSROException("unable to connect to Publisher endpoint" + getPublishEndpoint());
|
2019-12-12 12:58:30 +01:00
|
|
|
}
|
|
|
|
catch (IOException e) {
|
2019-06-18 16:15:23 +02:00
|
|
|
e.printStackTrace();
|
|
|
|
errors.merge(-1, 1, Integer::sum);
|
|
|
|
}
|
2019-12-16 18:52:08 +01:00
|
|
|
finally{
|
2020-01-14 16:55:45 +01:00
|
|
|
if(responsePPOST != null) responsePPOST.close();
|
2019-12-16 18:52:08 +01:00
|
|
|
}
|
2019-06-18 16:15:23 +02:00
|
|
|
return -1;
|
|
|
|
});
|
|
|
|
resList.add(res);
|
|
|
|
}
|
|
|
|
executorService.shutdown();
|
|
|
|
|
|
|
|
//now let's wait for the results. We can block ourselves here: we have nothing else to do
|
|
|
|
log.info("Waiting for responses");
|
|
|
|
for(Future<Integer> res : resList){
|
|
|
|
if(res.get() == 200) countOk++;
|
|
|
|
}
|
2019-12-13 14:55:51 +01:00
|
|
|
log.info(String.format("Got all responses. Ok responses: %s/%s", countOk, countAll));
|
2019-06-18 16:15:23 +02:00
|
|
|
|
|
|
|
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "countOk", countOk);
|
|
|
|
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "countAll", countAll);
|
|
|
|
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "errorsMap", new Gson().toJson(errors));
|
|
|
|
|
|
|
|
log.info("publishing completed");
|
|
|
|
if (!errors.isEmpty()) {
|
2019-12-12 12:58:30 +01:00
|
|
|
log.warn("Problems in publishing: "+countOk+"/"+countAll+" see error maps for details");
|
2019-06-18 16:15:23 +02:00
|
|
|
}
|
2019-12-16 18:52:08 +01:00
|
|
|
if(countAll == 0) {
|
|
|
|
log.warn("0 resources to publish");
|
|
|
|
}
|
2019-12-13 14:55:51 +01:00
|
|
|
|
|
|
|
if (countOk > 0) {
|
|
|
|
log.info("Feed provenance endpoint: " + getProvenanceFeedEndpoint());
|
2020-01-14 16:55:45 +01:00
|
|
|
CloseableHttpResponse responsePFPOST = null;
|
2019-12-13 14:55:51 +01:00
|
|
|
try {
|
|
|
|
HttpPost post = new HttpPost(getProvenanceFeedEndpoint());
|
|
|
|
List<NameValuePair> params = Lists.newArrayList();
|
|
|
|
String datasourceInterfaceValue = getDatasourceInterface();
|
|
|
|
log.info("feeding provenance for datasourceInterface " + datasourceInterfaceValue);
|
|
|
|
params.add(new BasicNameValuePair("datasourceApi", datasourceInterfaceValue));
|
2020-01-24 10:52:04 +01:00
|
|
|
String datasourceValue = getDatasource();
|
|
|
|
log.info("feeding provenance for datasource " + datasourceValue);
|
|
|
|
params.add(new BasicNameValuePair("datasource", datasourceValue));
|
2019-12-13 14:55:51 +01:00
|
|
|
UrlEncodedFormEntity ent = new UrlEncodedFormEntity(params, "UTF-8");
|
|
|
|
post.setEntity(ent);
|
2020-01-14 16:55:45 +01:00
|
|
|
responsePFPOST = client.execute(post);
|
|
|
|
int statusCode = responsePFPOST.getStatusLine().getStatusCode();
|
2019-12-13 14:55:51 +01:00
|
|
|
switch (statusCode) {
|
|
|
|
case 200:
|
|
|
|
log.info("feed provenance completed");
|
|
|
|
break;
|
|
|
|
default:
|
2020-01-14 16:55:45 +01:00
|
|
|
log.error("error feeding provenance " + responsePFPOST.getStatusLine().getStatusCode() + ": " + responsePFPOST.getStatusLine().getReasonPhrase());
|
2019-12-13 14:55:51 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
} catch (ConnectException ce) {
|
|
|
|
throw new MSROException("unable to connect to Publisher endpoint" + getPublishEndpoint());
|
|
|
|
}
|
|
|
|
catch (IOException e) {
|
|
|
|
log.error("error feeding provenance ", e);
|
|
|
|
}
|
2019-12-16 18:52:08 +01:00
|
|
|
finally{
|
2020-01-14 16:55:45 +01:00
|
|
|
if(responsePFPOST != null) responsePFPOST.close();
|
2019-12-17 15:35:36 +01:00
|
|
|
client.close();
|
2019-12-16 18:52:08 +01:00
|
|
|
cm.shutdown();
|
|
|
|
}
|
2019-12-13 14:55:51 +01:00
|
|
|
}
|
2019-12-16 18:52:08 +01:00
|
|
|
|
2019-06-18 16:15:23 +02:00
|
|
|
return Arc.DEFAULT_ARC;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getPublisherEndpoint() {
|
|
|
|
return publisherEndpoint;
|
|
|
|
}
|
2019-12-13 14:55:51 +01:00
|
|
|
|
|
|
|
private String getPublishEndpoint() {
|
|
|
|
return publisherEndpoint.concat("/publish");
|
|
|
|
}
|
|
|
|
|
|
|
|
private String getProvenanceFeedEndpoint() {
|
|
|
|
return publisherEndpoint.concat("/feedProvenance");
|
|
|
|
}
|
2019-06-18 16:15:23 +02:00
|
|
|
|
2020-01-14 16:55:45 +01:00
|
|
|
private String getDropDatasourceApisPartitionInfoEndpoint() {
|
2020-02-19 14:33:54 +01:00
|
|
|
return publisherEndpoint.concat("/dropDatasourceApiGraph");
|
2020-01-14 16:55:45 +01:00
|
|
|
}
|
|
|
|
|
2019-06-18 16:15:23 +02:00
|
|
|
public void setPublisherEndpoint(final String publisherEndpoint) {
|
|
|
|
this.publisherEndpoint = publisherEndpoint;
|
|
|
|
}
|
|
|
|
|
|
|
|
public ResultSetClient getResultSetClient() {
|
|
|
|
return resultSetClient;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setResultSetClient(final ResultSetClient resultSetClient) {
|
|
|
|
this.resultSetClient = resultSetClient;
|
|
|
|
}
|
|
|
|
|
2019-12-12 12:58:30 +01:00
|
|
|
public String getEprParam() {
|
|
|
|
return eprParam;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setEprParam(String eprParam) {
|
|
|
|
this.eprParam = eprParam;
|
|
|
|
}
|
2019-12-13 14:55:51 +01:00
|
|
|
|
|
|
|
public String getDatasourceInterface() {
|
|
|
|
return datasourceInterface;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public void setDatasourceInterface(String datasourceInterface) {
|
|
|
|
this.datasourceInterface = datasourceInterface;
|
|
|
|
}
|
2019-12-12 12:58:30 +01:00
|
|
|
|
2019-12-18 15:55:53 +01:00
|
|
|
@Override
|
|
|
|
protected void beforeStart(Token token) {
|
|
|
|
token.setProgressProvider(new ResultsetProgressProvider(token.getEnv().getAttribute(getEprParam(), ResultSet.class), this.resultSetClient));
|
|
|
|
}
|
|
|
|
|
2020-01-24 10:52:04 +01:00
|
|
|
|
|
|
|
public String getDatasource() {
|
|
|
|
return datasource;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public void setDatasource(String datasource) {
|
|
|
|
this.datasource = datasource;
|
|
|
|
}
|
|
|
|
|
2019-06-18 16:15:23 +02:00
|
|
|
}
|