Support to Set

This commit is contained in:
Fabio Sinibaldi 2020-12-16 11:29:12 +01:00
parent 15ee39bd9d
commit 2bc93e5312
3 changed files with 52 additions and 48 deletions

View File

@ -5,3 +5,4 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
## [1.0.4-SNAPSHOT] - 2020-12-15
- Dependency management
- Naming Convention
- Support to Set Filtering [#20342]

View File

@ -3,6 +3,7 @@ package org.gcube.data.publishing.gFeed.collectors.oai;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
@ -22,6 +23,7 @@ import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token;
import org.glassfish.jersey.client.ClientProperties;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
@ -68,11 +70,34 @@ public class OAIClient {
return client;
}
@Getter
private List<String> specifiedSets=new ArrayList<String>();
public Collection<OAIRecord> getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
WebTarget target=getWebClient().target(baseUrl).queryParam("verb","ListRecords");
if(!specifiedSets.isEmpty())
for(String set : specifiedSets) {
log.info("Loading "+metadataPrefix+" SET : "+set+" from "+baseUrl);
target.queryParam("set", set);
toReturn.addAll(call(target,metadataPrefix));
}
else {
log.info("Loading "+metadataPrefix+" from "+baseUrl);
toReturn.addAll(call(target,metadataPrefix));
}
log.info("Obtained "+toReturn.size()+" from "+baseUrl);
return toReturn;
}
private List<OAIRecord> call(WebTarget target,String metadataPrefix){
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
String resumptionToken=null;
// call & iterate
@ -80,8 +105,6 @@ public class OAIClient {
int currentAttempt=1;
while(!isComplete) {
try {
WebTarget target=getWebClient().target(baseUrl).
queryParam("verb","ListRecords");
if(resumptionToken==null)
target=target.queryParam("metadataPrefix",metadataPrefix);
@ -89,16 +112,20 @@ public class OAIClient {
target=target.queryParam("resumptionToken", resumptionToken);
Response resp=target.request("application/xml").get();
Response resp=target.request("application/xml").get();
OAI_PMH msg=check(resp);
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
//No errors, thus reset attempt counter
currentAttempt=1;
toReturn.addAll(msg.getResponseRecords().getRecords());
log.debug("Parsed "+toReturn.size()+" records so far.");
OAI_PMH msg=check(resp);
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
//No errors, thus reset attempt counter
currentAttempt=1;
toReturn.addAll(msg.getResponseRecords().getRecords());
log.debug("Parsed "+toReturn.size()+" records so far.");
Token t=msg.getResponseRecords().getResumptionToken();
log.debug("Obtained token : "+t);
@ -107,35 +134,12 @@ public class OAIClient {
resumptionToken=t.getId();
}else isComplete=true; //no token = completion
//Using limit
if(maxItems>0 && toReturn.size()>=maxItems) {
log.warn("MAX ITEMS LIMIT REACHED : "+toReturn.size()+" / "+maxItems);
isComplete=true;
}
// }catch(CommunicationException e) {
// log.warn("Received communication error "+e.getMessage());
// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
// isComplete=currentAttempt>MAX_ATTEMPTS;
// try {
// Thread.sleep(currentAttempt*DELAY_FACTOR);
// } catch (InterruptedException e1) {}
// currentAttempt++;
//
// }catch(OAIInteractionException e) {
// log.warn("Remote OAI "+baseUrl+" didn't accept request ",e);
// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
// isComplete=currentAttempt>MAX_ATTEMPTS;
// try {
// Thread.sleep(currentAttempt*DELAY_FACTOR);
// } catch (InterruptedException e1) {}
// currentAttempt++;
}catch(Throwable t) {
// throw new OAIInteractionException("Unexpected error while harvesting "+baseUrl,t);
log.warn("Unexpected ERROR "+t.getMessage());
log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
isComplete=currentAttempt>MAX_ATTEMPTS;
@ -145,15 +149,9 @@ public class OAIClient {
currentAttempt++;
}
}
log.trace("Obtained "+toReturn.size()+" from "+baseUrl);
return toReturn;
}
private void retry() {
}
private static OAI_PMH check(Response resp) throws JAXBException, CommunicationException {
if(resp.getStatus()<200||resp.getStatus()>=300) {
// exception

View File

@ -4,6 +4,8 @@ import java.util.HashSet;
import java.util.Set;
import org.gcube.common.resources.gcore.ServiceEndpoint;
import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint;
import org.gcube.common.resources.gcore.ServiceEndpoint.Property;
import org.gcube.data.publishing.gCatFeeder.utils.ISUtils;
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
@ -22,11 +24,14 @@ public class OAICollector implements DataCollector<OAIRecord> {
String oaiPlatform="oai-pmh";
for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) {
log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name());
String baseUrl=epr.profile().accessPoints().asCollection().iterator().next().address();
AccessPoint point=epr.profile().accessPoints().asCollection().iterator().next();
String baseUrl=point.address();
log.debug("Address is "+baseUrl);
OAIClient client = new OAIClient(baseUrl);
point.properties().iterator().forEachRemaining((Property p)->{
if(p.name().equals("set"))
client.getSpecifiedSets().add(p.value());
});
toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX));
}