From fa71f9a7e1b58fa8351b7ec78872e2463284f117 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 5 May 2022 16:13:32 +0200 Subject: [PATCH] Incremental support for thanados plugin --- dnet-ariadneplus/pom.xml | 5 ++++ .../thanados/ThanadosCollectorPlugin.java | 6 ++++- .../thanados/ThanadosIterator.java | 26 ++++++++++++++----- .../thanados/ThanadosIteratorTest.java | 11 ++++++-- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/dnet-ariadneplus/pom.xml b/dnet-ariadneplus/pom.xml index 3abf3a6..ce4e2e4 100644 --- a/dnet-ariadneplus/pom.xml +++ b/dnet-ariadneplus/pom.xml @@ -36,6 +36,11 @@ dnet-msro-service [7.0.0-SAXONHE-SOLR772-SNAPSHOT, 8.0.0-SAXONHE) + + net.minidev + json-smart + 2.4.8 + diff --git a/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosCollectorPlugin.java b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosCollectorPlugin.java index fc54d78..946e0a3 100644 --- a/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosCollectorPlugin.java +++ b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosCollectorPlugin.java @@ -4,6 +4,10 @@ import eu.dnetlib.data.collector.plugins.httplist.HttpListIterator; import eu.dnetlib.rmi.data.CollectorServiceException; import eu.dnetlib.rmi.data.InterfaceDescriptor; import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateUtils; + +import java.util.Date; public class ThanadosCollectorPlugin extends AbstractCollectorPlugin { @@ -12,6 +16,6 @@ public class ThanadosCollectorPlugin extends AbstractCollectorPlugin { final String baseUrl = interfaceDescriptor.getBaseUrl(); final String listAddress = interfaceDescriptor.getParams().get("listUrl"); - return () -> new ThanadosIterator(baseUrl, listAddress); + return () -> new ThanadosIterator(baseUrl, listAddress, fromDate); } } diff --git a/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIterator.java b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIterator.java index 05f1a81..1199a39 100644 --- a/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIterator.java +++ b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIterator.java @@ -5,6 +5,7 @@ import com.google.gson.reflect.TypeToken; import eu.dnetlib.data.collector.ThreadSafeIterator; import eu.dnetlib.rmi.data.CollectorServiceRuntimeException; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.HttpStatus; @@ -18,6 +19,11 @@ import java.io.IOException; import java.lang.reflect.Type; import java.util.List; +import net.minidev.json.JSONArray; +import net.minidev.json.JSONObject; +import net.minidev.json.parser.JSONParser; +import net.minidev.json.parser.ParseException; + public class ThanadosIterator extends ThreadSafeIterator { private static final Log log = LogFactory.getLog(ThanadosIterator.class); @@ -27,12 +33,15 @@ public class ThanadosIterator extends ThreadSafeIterator { private List identifiers; private int counter = 0; private String urlFormat = "%s/%s?format=xml"; + private String fromDate; - public ThanadosIterator(final String baseUrl, final String listAddress) { + + public ThanadosIterator(final String baseUrl, final String listAddress, final String fromDate) { try { this.baseUrl = baseUrl; - this.identifiers = downloadIdentifierList(listAddress); + this.identifiers = downloadIdentifierList(listAddress, fromDate); this.counter = 0; + this.fromDate = fromDate; } catch (Exception e) { throw new CollectorServiceRuntimeException("Error creating iterator", e); } @@ -59,12 +68,17 @@ public class ThanadosIterator extends ThreadSafeIterator { } } - protected List downloadIdentifierList(final String listUrl) { - - String list = download(listUrl); + protected List downloadIdentifierList(final String listUrl, final String fromDate) throws ParseException { + String urlToListItems = listUrl; + if(StringUtils.isNotBlank(fromDate)) + urlToListItems = listUrl+"/"+fromDate; + log.info("Getting list of items from "+urlToListItems); + String response = download(urlToListItems); + JSONObject map = (JSONObject)(new JSONParser(JSONParser.MODE_PERMISSIVE).parse(response)); + final String sites = map.getAsString("\"sites\""); Gson converter = new Gson(); Type type = new TypeToken>(){}.getType(); - return converter.fromJson(list, type ); + return converter.fromJson(sites, type ); } diff --git a/dnet-ariadneplus/src/test/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIteratorTest.java b/dnet-ariadneplus/src/test/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIteratorTest.java index 6054f20..ec2bac3 100644 --- a/dnet-ariadneplus/src/test/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIteratorTest.java +++ b/dnet-ariadneplus/src/test/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIteratorTest.java @@ -11,7 +11,7 @@ public class ThanadosIteratorTest { @Test public void testDownloadList(){ - it = new ThanadosIterator("", sitelist); + it = new ThanadosIterator("", sitelist, ""); it.getIdentifiers().stream().forEach(id -> System.out.println(id)); System.out.println(it.getIdentifiers().size()); } @@ -19,7 +19,7 @@ public class ThanadosIteratorTest { @Test public void testDownload(){ int count = 0; - it = new ThanadosIterator("https://thanados.openatlas.eu/api/0.3/subunits", sitelist); + it = new ThanadosIterator("https://thanados.openatlas.eu/api/0.3/subunits", sitelist, ""); while(it.hasNext()){ it.next(); count++; @@ -29,4 +29,11 @@ public class ThanadosIteratorTest { } + @Test + public void testIncremental(){ + it = new ThanadosIterator("", sitelist, "2022-03-12"); + it.getIdentifiers().stream().forEach(id -> System.out.println(id)); + System.out.println(it.getIdentifiers().size()); + } + }