Support to Set
This commit is contained in:
parent
15ee39bd9d
commit
2bc93e5312
|
@ -5,3 +5,4 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
||||||
## [1.0.4-SNAPSHOT] - 2020-12-15
|
## [1.0.4-SNAPSHOT] - 2020-12-15
|
||||||
- Dependency management
|
- Dependency management
|
||||||
- Naming Convention
|
- Naming Convention
|
||||||
|
- Support to Set Filtering [#20342]
|
|
@ -3,6 +3,7 @@ package org.gcube.data.publishing.gFeed.collectors.oai;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import javax.ws.rs.client.Client;
|
import javax.ws.rs.client.Client;
|
||||||
import javax.ws.rs.client.ClientBuilder;
|
import javax.ws.rs.client.ClientBuilder;
|
||||||
|
@ -22,6 +23,7 @@ import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
|
||||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token;
|
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token;
|
||||||
import org.glassfish.jersey.client.ClientProperties;
|
import org.glassfish.jersey.client.ClientProperties;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
|
@ -68,11 +70,34 @@ public class OAIClient {
|
||||||
return client;
|
return client;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private List<String> specifiedSets=new ArrayList<String>();
|
||||||
|
|
||||||
|
|
||||||
public Collection<OAIRecord> getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{
|
public Collection<OAIRecord> getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{
|
||||||
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
|
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
|
||||||
|
|
||||||
|
WebTarget target=getWebClient().target(baseUrl).queryParam("verb","ListRecords");
|
||||||
|
|
||||||
|
if(!specifiedSets.isEmpty())
|
||||||
|
for(String set : specifiedSets) {
|
||||||
|
log.info("Loading "+metadataPrefix+" SET : "+set+" from "+baseUrl);
|
||||||
|
target.queryParam("set", set);
|
||||||
|
toReturn.addAll(call(target,metadataPrefix));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
log.info("Loading "+metadataPrefix+" from "+baseUrl);
|
||||||
|
toReturn.addAll(call(target,metadataPrefix));
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Obtained "+toReturn.size()+" from "+baseUrl);
|
||||||
|
return toReturn;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private List<OAIRecord> call(WebTarget target,String metadataPrefix){
|
||||||
|
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
|
||||||
|
|
||||||
String resumptionToken=null;
|
String resumptionToken=null;
|
||||||
|
|
||||||
// call & iterate
|
// call & iterate
|
||||||
|
@ -80,8 +105,6 @@ public class OAIClient {
|
||||||
int currentAttempt=1;
|
int currentAttempt=1;
|
||||||
while(!isComplete) {
|
while(!isComplete) {
|
||||||
try {
|
try {
|
||||||
WebTarget target=getWebClient().target(baseUrl).
|
|
||||||
queryParam("verb","ListRecords");
|
|
||||||
|
|
||||||
if(resumptionToken==null)
|
if(resumptionToken==null)
|
||||||
target=target.queryParam("metadataPrefix",metadataPrefix);
|
target=target.queryParam("metadataPrefix",metadataPrefix);
|
||||||
|
@ -89,16 +112,20 @@ public class OAIClient {
|
||||||
target=target.queryParam("resumptionToken", resumptionToken);
|
target=target.queryParam("resumptionToken", resumptionToken);
|
||||||
|
|
||||||
|
|
||||||
Response resp=target.request("application/xml").get();
|
Response resp=target.request("application/xml").get();
|
||||||
|
|
||||||
OAI_PMH msg=check(resp);
|
|
||||||
|
|
||||||
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
|
|
||||||
//No errors, thus reset attempt counter
|
|
||||||
currentAttempt=1;
|
|
||||||
|
|
||||||
toReturn.addAll(msg.getResponseRecords().getRecords());
|
|
||||||
log.debug("Parsed "+toReturn.size()+" records so far.");
|
OAI_PMH msg=check(resp);
|
||||||
|
|
||||||
|
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
|
||||||
|
//No errors, thus reset attempt counter
|
||||||
|
currentAttempt=1;
|
||||||
|
|
||||||
|
toReturn.addAll(msg.getResponseRecords().getRecords());
|
||||||
|
log.debug("Parsed "+toReturn.size()+" records so far.");
|
||||||
|
|
||||||
|
|
||||||
Token t=msg.getResponseRecords().getResumptionToken();
|
Token t=msg.getResponseRecords().getResumptionToken();
|
||||||
log.debug("Obtained token : "+t);
|
log.debug("Obtained token : "+t);
|
||||||
|
@ -107,35 +134,12 @@ public class OAIClient {
|
||||||
resumptionToken=t.getId();
|
resumptionToken=t.getId();
|
||||||
}else isComplete=true; //no token = completion
|
}else isComplete=true; //no token = completion
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Using limit
|
//Using limit
|
||||||
if(maxItems>0 && toReturn.size()>=maxItems) {
|
if(maxItems>0 && toReturn.size()>=maxItems) {
|
||||||
log.warn("MAX ITEMS LIMIT REACHED : "+toReturn.size()+" / "+maxItems);
|
log.warn("MAX ITEMS LIMIT REACHED : "+toReturn.size()+" / "+maxItems);
|
||||||
isComplete=true;
|
isComplete=true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// }catch(CommunicationException e) {
|
|
||||||
// log.warn("Received communication error "+e.getMessage());
|
|
||||||
// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
|
|
||||||
// isComplete=currentAttempt>MAX_ATTEMPTS;
|
|
||||||
// try {
|
|
||||||
// Thread.sleep(currentAttempt*DELAY_FACTOR);
|
|
||||||
// } catch (InterruptedException e1) {}
|
|
||||||
// currentAttempt++;
|
|
||||||
//
|
|
||||||
// }catch(OAIInteractionException e) {
|
|
||||||
// log.warn("Remote OAI "+baseUrl+" didn't accept request ",e);
|
|
||||||
// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
|
|
||||||
// isComplete=currentAttempt>MAX_ATTEMPTS;
|
|
||||||
// try {
|
|
||||||
// Thread.sleep(currentAttempt*DELAY_FACTOR);
|
|
||||||
// } catch (InterruptedException e1) {}
|
|
||||||
// currentAttempt++;
|
|
||||||
}catch(Throwable t) {
|
}catch(Throwable t) {
|
||||||
// throw new OAIInteractionException("Unexpected error while harvesting "+baseUrl,t);
|
|
||||||
log.warn("Unexpected ERROR "+t.getMessage());
|
log.warn("Unexpected ERROR "+t.getMessage());
|
||||||
log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
|
log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = ");
|
||||||
isComplete=currentAttempt>MAX_ATTEMPTS;
|
isComplete=currentAttempt>MAX_ATTEMPTS;
|
||||||
|
@ -145,15 +149,9 @@ public class OAIClient {
|
||||||
currentAttempt++;
|
currentAttempt++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.trace("Obtained "+toReturn.size()+" from "+baseUrl);
|
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void retry() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static OAI_PMH check(Response resp) throws JAXBException, CommunicationException {
|
private static OAI_PMH check(Response resp) throws JAXBException, CommunicationException {
|
||||||
if(resp.getStatus()<200||resp.getStatus()>=300) {
|
if(resp.getStatus()<200||resp.getStatus()>=300) {
|
||||||
// exception
|
// exception
|
||||||
|
|
|
@ -4,6 +4,8 @@ import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.gcube.common.resources.gcore.ServiceEndpoint;
|
import org.gcube.common.resources.gcore.ServiceEndpoint;
|
||||||
|
import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint;
|
||||||
|
import org.gcube.common.resources.gcore.ServiceEndpoint.Property;
|
||||||
import org.gcube.data.publishing.gCatFeeder.utils.ISUtils;
|
import org.gcube.data.publishing.gCatFeeder.utils.ISUtils;
|
||||||
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
|
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
|
||||||
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
|
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
|
||||||
|
@ -22,11 +24,14 @@ public class OAICollector implements DataCollector<OAIRecord> {
|
||||||
String oaiPlatform="oai-pmh";
|
String oaiPlatform="oai-pmh";
|
||||||
for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) {
|
for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) {
|
||||||
log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name());
|
log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name());
|
||||||
|
AccessPoint point=epr.profile().accessPoints().asCollection().iterator().next();
|
||||||
String baseUrl=epr.profile().accessPoints().asCollection().iterator().next().address();
|
String baseUrl=point.address();
|
||||||
log.debug("Address is "+baseUrl);
|
log.debug("Address is "+baseUrl);
|
||||||
OAIClient client = new OAIClient(baseUrl);
|
OAIClient client = new OAIClient(baseUrl);
|
||||||
|
point.properties().iterator().forEachRemaining((Property p)->{
|
||||||
|
if(p.name().equals("set"))
|
||||||
|
client.getSpecifiedSets().add(p.value());
|
||||||
|
});
|
||||||
toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX));
|
toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue