multithreads http client not needed on indexjobnode, maybe avoid 404 response after a few minutes, added log on each indexing operation

pull/2/head
Enrico Ottonello 4 years ago
parent 56088d7779
commit db2ad3f97e

@ -67,8 +67,11 @@ public class RunSPARQLQueryService {
log.info("Start indexing "+ recordIds.size()+ " records ...");
final List<Integer> errorCodesCount = Arrays.asList(new Integer(0));
final List<Integer> successCodesCount = Arrays.asList(new Integer(0));
final List<Integer> counter = Arrays.asList(new Integer(0));
recordIds.forEach(recordId -> {
log.info(recordId+" >");
int operationResult = executeQueryGraph(selectQueryTemplate, recordId, isCollection);
log.info(" "+operationResult);
if (operationResult!=200) {
log.error(recordId + " error_code: "+ Integer.toString(operationResult));
int currentErrorsCount = errorCodesCount.get(0).intValue();
@ -80,7 +83,18 @@ public class RunSPARQLQueryService {
currentSuccessCount+=1;
successCodesCount.set(0, new Integer(currentSuccessCount));
}
int counterValue = counter.get(0).intValue();
String curReport = null;
if ((counterValue % 1000) == 0) {
curReport = "Current analyzed records: "+counterValue+" Current indexed records: "+ successCodesCount.get(0).intValue() +
" , " + "Current errors: "+ errorCodesCount.get(0).intValue();
log.info(curReport);
}
counterValue+=1;
counter.set(0, new Integer(counterValue));
});
String report = "Total indexed records: "+ successCodesCount.get(0).intValue() +
" , " + "Total errors: "+ errorCodesCount.get(0).intValue();
log.info(report);

@ -0,0 +1,124 @@
PREFIX aocat: <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
INSERT {
GRAPH <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::398> {
?collection aocat:has_ARIADNE_subject ?archeologicalResourceType .
?archeologicalResourceType skos:prefLabel ?archeologicalResourceTypeName .
?archeologicalResourceType rdfs:label ?archeologicalResourceTypeName .
}
}
WHERE {
?collection rdf:type <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/AO_Collection> .
?collection aocat:has_ARIADNE_subject ?archeologicalResourceType .
?archeologicalResourceType skos:prefLabel ?archeologicalResourceTypeName .
?collection aocat:has_original_id "1000398" .
};
PREFIX aocat: <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX ariadneplus: <https://ariadne-infrastructure.eu/aocat/>
INSERT {
GRAPH <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::398> {
?record aocat:was_issued ?issued .
?record aocat:was_modified ?modified .
?record aocat:has_contributor ?contributor .
?record aocat:has_responsible ?legalResponsible .
?record aocat:has_owner ?owner .
?record aocat:has_publisher ?publisher .
?record aocat:has_access_rights ?accessRights .
?record aocat:has_ARIADNE_subject ?archeologicalResourceType .
}
}
USING <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::398>
WHERE {
?record aocat:is_part_of ?collection .
?collection aocat:was_issued ?issued .
?collection aocat:was_modified ?modified .
?collection aocat:has_contributor ?contributor .
?collection aocat:has_responsible ?legalResponsible .
?collection aocat:has_owner ?owner .
?collection aocat:has_publisher ?publisher .
?collection aocat:has_access_rights ?accessRights .
?collection aocat:has_ARIADNE_subject ?archeologicalResourceType .
};
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX aocat: <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT {
GRAPH <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::398> {
?s aocat:has_native_subject <https://ariadne-infrastructure.eu/aocat/Concept/ADS/NOT_PROVIDED> .
<https://ariadne-infrastructure.eu/aocat/Concept/ADS/NOT_PROVIDED> skos:prefLabel "Not provided" .
}
}
WHERE {
GRAPH <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::398> {
?s rdf:type aocat:AO_Individual_Data_Resource .
MINUS {
?s rdf:type aocat:AO_Individual_Data_Resource .
?s aocat:has_native_subject ?ns .
}
}
};
PREFIX aocat: <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
INSERT {
GRAPH <https://ariadne-infrastructure.eu/ariadneplus::ads::aatplus> {
?record aocat:has_derived_subject ?aat .
}
}
USING <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::aat>
USING <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::398>
WHERE {
{
?record aocat:has_native_subject ?native_subject .
?native_subject skos:exactMatch ?aat .
}
union
{
?record aocat:has_native_subject ?native_subject .
?native_subject skos:broadMatch ?aat .
}
union
{
?record aocat:has_native_subject ?native_subject .
?native_subject skos:closeMatch ?aat .
}
union
{
?record aocat:has_native_subject ?native_subject .
?native_subject skos:narrowMatch ?aat .
}
};
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX aocat: <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/>
PREFIX time: <http://www.w3.org/2006/time#>
INSERT {
GRAPH <https://ariadne-infrastructure.eu/ariadneplus::ads::periodoplus> {
?temporal aocat:has_period ?periodO .
?temporal aocat:from ?temporalFrom .
?temporal aocat:until ?temporalUntil .
}
}
WHERE {
GRAPH <https://ariadne-infrastructure.eu/api_________::ariadne_plus::ads::398> {
?temporal aocat:has_native_period ?native_period .
?native_period skos:prefLabel ?native_label .
optional {
GRAPH <https://ariadne-infrastructure.eu/ariadneplus::ads::periodo> {
?periodO skos:altLabel ?native_label .
?periodO skos:inScheme <http://n2t.net/ark:/99152/p0kh9ds> .
?periodO time:intervalStartedBy ?intervalStartedBy .
?intervalStartedBy skos:prefLabel ?temporalFrom .
?periodO time:intervalFinishedBy ?intervalFinishedBy .
?intervalFinishedBy skos:prefLabel ?temporalUntil .
}
}
}
};

@ -21,7 +21,7 @@ import java.util.Properties;
* @author enrico.ottonello
*
*/
@Ignore
//@Ignore
public class GraphDbReaderAndESIndexTest {
private RunSPARQLQueryService runSPQRLQuery;
@ -58,7 +58,7 @@ public class GraphDbReaderAndESIndexTest {
final ClassPathResource queryTemplateResource;
boolean testRecord = true;
if (testRecord) {
recordId = "https://ariadne-infrastructure.eu/aocat/Resource/FE3155A7-AF9F-3C5F-A92E-93041EF495E0";
recordId = "https://ariadne-infrastructure.eu/aocat/Resource/D200902C-A1C2-346E-8F37-E8A429260ADE";
queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql");
}
else {
@ -67,7 +67,7 @@ public class GraphDbReaderAndESIndexTest {
}
String datasource = "ads";
String collectionId = "270";
String collectionId = "398";
List<String> recordIds = Arrays.asList(recordId);
String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name());
if (testRecord) {

@ -16,6 +16,7 @@ import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
@ -46,7 +47,7 @@ public class IndexOnESJobNode extends AsyncJobNode {
private String datasource;
//for parallel requests to the publisher endpoint
private int nThreads = 5;
// private int nThreads = 5;
@Override
protected String execute(final Env env) throws Exception {
@ -54,10 +55,11 @@ public class IndexOnESJobNode extends AsyncJobNode {
int statusCode = -1;
String indexOnESResult = "noResult";
log.info("Publisher endpoint: " + getPublisherEndpoint());
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
cm.setMaxTotal(nThreads);
CloseableHttpClient client = HttpClients.custom().setConnectionManager(cm).build();
// PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
// cm.setMaxTotal(nThreads);
// CloseableHttpClient client = HttpClients.custom().setConnectionManager(cm).build();
CloseableHttpClient client = HttpClients.createDefault();
log.info("IndexOnES endpoint: " + getIndexOnESEndpoint());
CloseableHttpResponse responsePOST = null;
try {
@ -95,7 +97,7 @@ public class IndexOnESJobNode extends AsyncJobNode {
finally{
if(responsePOST != null) responsePOST.close();
client.close();
cm.shutdown();
// cm.shutdown();
}
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "statusCode", Integer.toString(statusCode));

Loading…
Cancel
Save