From 2bc93e53121b4314d70cad0f02f7991fd0a308ad Mon Sep 17 00:00:00 2001 From: Fabio Sinibaldi Date: Wed, 16 Dec 2020 11:29:12 +0100 Subject: [PATCH] Support to Set --- oai-harvester/CHANGELOG.md | 3 +- .../gFeed/collectors/oai/OAIClient.java | 86 +++++++++---------- .../gFeed/collectors/oai/OAICollector.java | 11 ++- 3 files changed, 52 insertions(+), 48 deletions(-) diff --git a/oai-harvester/CHANGELOG.md b/oai-harvester/CHANGELOG.md index 4ba801c..bcccc0d 100644 --- a/oai-harvester/CHANGELOG.md +++ b/oai-harvester/CHANGELOG.md @@ -4,4 +4,5 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [1.0.4-SNAPSHOT] - 2020-12-15 - Dependency management -- Naming Convention \ No newline at end of file +- Naming Convention +- Support to Set Filtering [#20342] \ No newline at end of file diff --git a/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAIClient.java b/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAIClient.java index f87c6a7..f42ba6b 100644 --- a/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAIClient.java +++ b/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAIClient.java @@ -3,6 +3,7 @@ package org.gcube.data.publishing.gFeed.collectors.oai; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; +import java.util.List; import javax.ws.rs.client.Client; import javax.ws.rs.client.ClientBuilder; @@ -22,6 +23,7 @@ import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH; import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token; import org.glassfish.jersey.client.ClientProperties; +import lombok.Getter; import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.Setter; @@ -34,8 +36,8 @@ public class OAIClient { private static JAXBContext jaxbContext=null; private static final int MAX_ATTEMPTS=3; private static final long DELAY_FACTOR=1000; - - + + private static synchronized JAXBContext getContext() throws JAXBException { if(jaxbContext==null) jaxbContext = JAXBContext.newInstance(OAIRecord.class, @@ -56,8 +58,8 @@ public class OAIClient { @NonNull @Setter private Integer maxItems=-1; - - + + Client client; private synchronized Client getWebClient() { @@ -68,10 +70,33 @@ public class OAIClient { return client; } + @Getter + private List specifiedSets=new ArrayList(); public Collection getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{ ArrayList toReturn=new ArrayList(); + + WebTarget target=getWebClient().target(baseUrl).queryParam("verb","ListRecords"); + + if(!specifiedSets.isEmpty()) + for(String set : specifiedSets) { + log.info("Loading "+metadataPrefix+" SET : "+set+" from "+baseUrl); + target.queryParam("set", set); + toReturn.addAll(call(target,metadataPrefix)); + } + else { + log.info("Loading "+metadataPrefix+" from "+baseUrl); + toReturn.addAll(call(target,metadataPrefix)); + } + + log.info("Obtained "+toReturn.size()+" from "+baseUrl); + return toReturn; + } + + + private List call(WebTarget target,String metadataPrefix){ + ArrayList toReturn=new ArrayList(); String resumptionToken=null; @@ -80,62 +105,41 @@ public class OAIClient { int currentAttempt=1; while(!isComplete) { try { - WebTarget target=getWebClient().target(baseUrl). - queryParam("verb","ListRecords"); - + if(resumptionToken==null) target=target.queryParam("metadataPrefix",metadataPrefix); else target=target.queryParam("resumptionToken", resumptionToken); - Response resp=target.request("application/xml").get(); + Response resp=target.request("application/xml").get(); - OAI_PMH msg=check(resp); - if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage()); - //No errors, thus reset attempt counter - currentAttempt=1; - toReturn.addAll(msg.getResponseRecords().getRecords()); - log.debug("Parsed "+toReturn.size()+" records so far."); + OAI_PMH msg=check(resp); + + if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage()); + //No errors, thus reset attempt counter + currentAttempt=1; + + toReturn.addAll(msg.getResponseRecords().getRecords()); + log.debug("Parsed "+toReturn.size()+" records so far."); + + Token t=msg.getResponseRecords().getResumptionToken(); log.debug("Obtained token : "+t); - + if(t!=null && t.getId()!=null && !t.getId().isEmpty()) { resumptionToken=t.getId(); }else isComplete=true; //no token = completion - - //Using limit if(maxItems>0 && toReturn.size()>=maxItems) { log.warn("MAX ITEMS LIMIT REACHED : "+toReturn.size()+" / "+maxItems); isComplete=true; } - - - -// }catch(CommunicationException e) { -// log.warn("Received communication error "+e.getMessage()); -// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = "); -// isComplete=currentAttempt>MAX_ATTEMPTS; -// try { -// Thread.sleep(currentAttempt*DELAY_FACTOR); -// } catch (InterruptedException e1) {} -// currentAttempt++; -// -// }catch(OAIInteractionException e) { -// log.warn("Remote OAI "+baseUrl+" didn't accept request ",e); -// log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = "); -// isComplete=currentAttempt>MAX_ATTEMPTS; -// try { -// Thread.sleep(currentAttempt*DELAY_FACTOR); -// } catch (InterruptedException e1) {} -// currentAttempt++; }catch(Throwable t) { -// throw new OAIInteractionException("Unexpected error while harvesting "+baseUrl,t); log.warn("Unexpected ERROR "+t.getMessage()); log.debug("Current attempt number = "+currentAttempt," max attempt Number = "+MAX_ATTEMPTS+", attempts delay factor = "); isComplete=currentAttempt>MAX_ATTEMPTS; @@ -145,15 +149,9 @@ public class OAIClient { currentAttempt++; } } - log.trace("Obtained "+toReturn.size()+" from "+baseUrl); return toReturn; } - - private void retry() { - - } - private static OAI_PMH check(Response resp) throws JAXBException, CommunicationException { if(resp.getStatus()<200||resp.getStatus()>=300) { // exception diff --git a/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAICollector.java b/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAICollector.java index f09cbe0..dfe29a3 100644 --- a/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAICollector.java +++ b/oai-harvester/src/main/java/org/gcube/data/publishing/gFeed/collectors/oai/OAICollector.java @@ -4,6 +4,8 @@ import java.util.HashSet; import java.util.Set; import org.gcube.common.resources.gcore.ServiceEndpoint; +import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint; +import org.gcube.common.resources.gcore.ServiceEndpoint.Property; import org.gcube.data.publishing.gCatFeeder.utils.ISUtils; import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector; import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault; @@ -22,11 +24,14 @@ public class OAICollector implements DataCollector { String oaiPlatform="oai-pmh"; for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) { log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name()); - - String baseUrl=epr.profile().accessPoints().asCollection().iterator().next().address(); + AccessPoint point=epr.profile().accessPoints().asCollection().iterator().next(); + String baseUrl=point.address(); log.debug("Address is "+baseUrl); OAIClient client = new OAIClient(baseUrl); - + point.properties().iterator().forEachRemaining((Property p)->{ + if(p.name().equals("set")) + client.getSpecifiedSets().add(p.value()); + }); toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX)); }