Support to Set

This commit is contained in:
Fabio Sinibaldi 2020-12-16 11:29:12 +01:00
parent 15ee39bd9d
commit 2bc93e5312
3 changed files with 52 additions and 48 deletions

View File

@ -5,3 +5,4 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
## [1.0.4-SNAPSHOT] - 2020-12-15 ## [1.0.4-SNAPSHOT] - 2020-12-15
- Dependency management - Dependency management
- Naming Convention - Naming Convention
- Support to Set Filtering [#20342]

View File

@ -3,6 +3,7 @@ package org.gcube.data.publishing.gFeed.collectors.oai;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List;
import javax.ws.rs.client.Client; import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder; import javax.ws.rs.client.ClientBuilder;
@ -22,6 +23,7 @@ import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token; import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token;
import org.glassfish.jersey.client.ClientProperties; import org.glassfish.jersey.client.ClientProperties;
import lombok.Getter;
import lombok.NonNull; import lombok.NonNull;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.Setter; import lombok.Setter;
@ -68,11 +70,34 @@ public class OAIClient {
return client; return client;
} }
@Getter
private List<String> specifiedSets=new ArrayList<String>();
public Collection<OAIRecord> getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{ public Collection<OAIRecord> getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>(); ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
WebTarget target=getWebClient().target(baseUrl).queryParam("verb","ListRecords");
if(!specifiedSets.isEmpty())
for(String set : specifiedSets) {
log.info("Loading "+metadataPrefix+" SET : "+set+" from "+baseUrl);
target.queryParam("set", set);
toReturn.addAll(call(target,metadataPrefix));
}
else {
log.info("Loading "+metadataPrefix+" from "+baseUrl);
toReturn.addAll(call(target,metadataPrefix));
}
log.info("Obtained "+toReturn.size()+" from "+baseUrl);
return toReturn;
}
private List<OAIRecord> call(WebTarget target,String metadataPrefix){
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
String resumptionToken=null; String resumptionToken=null;
// call & iterate // call & iterate
@ -80,8 +105,6 @@ public class OAIClient {
int currentAttempt=1; int currentAttempt=1;
while(!isComplete) { while(!isComplete) {
try { try {
WebTarget target=getWebClient().target(baseUrl).
queryParam("verb","ListRecords");
if(resumptionToken==null) if(resumptionToken==null)
target=target.queryParam("metadataPrefix",metadataPrefix); target=target.queryParam("metadataPrefix",metadataPrefix);
@ -89,16 +112,20 @@ public class OAIClient {
target=target.queryParam("resumptionToken", resumptionToken); target=target.queryParam("resumptionToken", resumptionToken);
Response resp=target.request("application/xml").get(); Response resp=target.request("application/xml").get();
OAI_PMH msg=check(resp);
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
//No errors, thus reset attempt counter
currentAttempt=1;
toReturn.addAll(msg.getResponseRecords().getRecords());
log.debug("Parsed "+toReturn.size()+" records so far."); OAI_PMH msg=check(resp);
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
//No errors, thus reset attempt counter
currentAttempt=1;
toReturn.addAll(msg.getResponseRecords().getRecords());
log.debug("Parsed "+toReturn.size()+" records so far.");
Token t=msg.getResponseRecords().getResumptionToken(); Token t=msg.getResponseRecords().getResumptionToken();
log.debug("Obtained token : "+t); log.debug("Obtained token : "+t);
@ -107,35 +134,12 @@ public class OAIClient {
resumptionToken=t.getId(); resumptionToken=t.getId();
}else isComplete=true; //no token = completion }else isComplete=true; //no token = completion
//Using limit //Using limit
if(maxItems>0 && toReturn.size()>=maxItems) { if(maxItems>0 && toReturn.size()>=maxItems) {
log.warn("MAX ITEMS LIMIT REACHED : "+toReturn.size()+" / "+maxItems); log.warn("MAX ITEMS LIMIT REACHED : "+toReturn.size()+" / "+maxItems);
isComplete=true; isComplete=true;
} }
// }catch(CommunicationException e) {
// log.warn("Received communication error "+e.getMessage());
// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
// isComplete=currentAttempt>MAX_ATTEMPTS;
// try {
// Thread.sleep(currentAttempt*DELAY_FACTOR);
// } catch (InterruptedException e1) {}
// currentAttempt++;
//
// }catch(OAIInteractionException e) {
// log.warn("Remote OAI "+baseUrl+" didn't accept request ",e);
// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
// isComplete=currentAttempt>MAX_ATTEMPTS;
// try {
// Thread.sleep(currentAttempt*DELAY_FACTOR);
// } catch (InterruptedException e1) {}
// currentAttempt++;
}catch(Throwable t) { }catch(Throwable t) {
// throw new OAIInteractionException("Unexpected error while harvesting "+baseUrl,t);
log.warn("Unexpected ERROR "+t.getMessage()); log.warn("Unexpected ERROR "+t.getMessage());
log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = "); log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
isComplete=currentAttempt>MAX_ATTEMPTS; isComplete=currentAttempt>MAX_ATTEMPTS;
@ -145,15 +149,9 @@ public class OAIClient {
currentAttempt++; currentAttempt++;
} }
} }
log.trace("Obtained "+toReturn.size()+" from "+baseUrl);
return toReturn; return toReturn;
} }
private void retry() {
}
private static OAI_PMH check(Response resp) throws JAXBException, CommunicationException { private static OAI_PMH check(Response resp) throws JAXBException, CommunicationException {
if(resp.getStatus()<200||resp.getStatus()>=300) { if(resp.getStatus()<200||resp.getStatus()>=300) {
// exception // exception

View File

@ -4,6 +4,8 @@ import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.gcube.common.resources.gcore.ServiceEndpoint; import org.gcube.common.resources.gcore.ServiceEndpoint;
import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint;
import org.gcube.common.resources.gcore.ServiceEndpoint.Property;
import org.gcube.data.publishing.gCatFeeder.utils.ISUtils; import org.gcube.data.publishing.gCatFeeder.utils.ISUtils;
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector; import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault; import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
@ -22,11 +24,14 @@ public class OAICollector implements DataCollector<OAIRecord> {
String oaiPlatform="oai-pmh"; String oaiPlatform="oai-pmh";
for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) { for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) {
log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name()); log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name());
AccessPoint point=epr.profile().accessPoints().asCollection().iterator().next();
String baseUrl=epr.profile().accessPoints().asCollection().iterator().next().address(); String baseUrl=point.address();
log.debug("Address is "+baseUrl); log.debug("Address is "+baseUrl);
OAIClient client = new OAIClient(baseUrl); OAIClient client = new OAIClient(baseUrl);
point.properties().iterator().forEachRemaining((Property p)->{
if(p.name().equals("set"))
client.getSpecifiedSets().add(p.value());
});
toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX)); toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX));
} }