diff --git a/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosCollectorPlugin.java b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosCollectorPlugin.java new file mode 100644 index 0000000..fc54d78 --- /dev/null +++ b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosCollectorPlugin.java @@ -0,0 +1,17 @@ +package eu.dnetlib.data.collector.plugins.ariadneplus.thanados; + +import eu.dnetlib.data.collector.plugins.httplist.HttpListIterator; +import eu.dnetlib.rmi.data.CollectorServiceException; +import eu.dnetlib.rmi.data.InterfaceDescriptor; +import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin; + +public class ThanadosCollectorPlugin extends AbstractCollectorPlugin { + + @Override + public Iterable collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) { + final String baseUrl = interfaceDescriptor.getBaseUrl(); + final String listAddress = interfaceDescriptor.getParams().get("listUrl"); + + return () -> new ThanadosIterator(baseUrl, listAddress); + } +} diff --git a/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIterator.java b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIterator.java new file mode 100644 index 0000000..72ffa9a --- /dev/null +++ b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIterator.java @@ -0,0 +1,97 @@ +package eu.dnetlib.data.collector.plugins.ariadneplus.thanados; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import eu.dnetlib.data.collector.ThreadSafeIterator; +import eu.dnetlib.rmi.data.CollectorServiceRuntimeException; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.http.HttpStatus; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.lang.reflect.Type; +import java.util.List; + +public class ThanadosIterator extends ThreadSafeIterator { + + private static final Log log = LogFactory.getLog(ThanadosIterator.class); + private final CloseableHttpClient client = HttpClients.createDefault(); + + private String baseUrl; + private String currentLine; + private List identifiers; + private int counter = 0; + private String urlFormat = "%s/%s?format=xml"; + + public ThanadosIterator(final String baseUrl, final String listAddress) { + try { + this.baseUrl = baseUrl; + this.identifiers = downloadIdentifierList(listAddress); + this.counter = 0; + } catch (Exception e) { + throw new CollectorServiceRuntimeException("Error creating iterator", e); + } + } + + @Override + public boolean doHasNext() { + return counter <= this.identifiers.size(); + } + + @Override + public String doNext() { + try { + if (counter <= this.identifiers.size()) { + String toDownload = String.format(urlFormat, baseUrl, identifiers.get(counter)); + log.debug("Downloading "+toDownload); + return download(toDownload); + } else { + throw new CollectorServiceRuntimeException("Iterator has reached the end"); + } + } finally { + counter++; + } + } + + protected List downloadIdentifierList(final String listUrl) { + + String list = download(listUrl); + Gson converter = new Gson(); + Type type = new TypeToken>(){}.getType(); + return converter.fromJson(list, type ); + + } + + private String download(final String url) { + log.debug("download: "+url); + final HttpGet method = new HttpGet(url); + + try(CloseableHttpResponse response = client.execute(method)) { + int statusCode = response.getStatusLine().getStatusCode(); + + if (HttpStatus.SC_OK == statusCode) { + return IOUtils.toString(new BufferedInputStream(response.getEntity().getContent()), "utf-8"); + } else { + throw new CollectorServiceRuntimeException("Error " + statusCode + " dowloading url: " + url); + } + + } catch (IOException e) { + throw new CollectorServiceRuntimeException("Error downloading url: " + url); + } + } + + + public List getIdentifiers() { + return identifiers; + } + + public void setIdentifiers(List identifiers) { + this.identifiers = identifiers; + } +} diff --git a/dnet-ariadneplus/src/test/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIteratorTest.java b/dnet-ariadneplus/src/test/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIteratorTest.java new file mode 100644 index 0000000..06acc26 --- /dev/null +++ b/dnet-ariadneplus/src/test/java/eu/dnetlib/data/collector/plugins/ariadneplus/thanados/ThanadosIteratorTest.java @@ -0,0 +1,31 @@ +package eu.dnetlib.data.collector.plugins.ariadneplus.thanados; + +import org.junit.Ignore; +import org.junit.Test; + +@Ignore +public class ThanadosIteratorTest { + + String sitelist = "https://thanados.net/sites/sitelist"; + ThanadosIterator it; + + @Test + public void testDownloadList(){ + it = new ThanadosIterator("", sitelist); + int count = 0; + it.getIdentifiers().stream().forEach(id -> System.out.println(id)); + + System.out.println(it.getIdentifiers().size()); + } + + @Test + public void testDownload(){ + int count = 0; + it = new ThanadosIterator("https://thanados.openatlas.eu/api/0.3/subunits/", sitelist); + while(it.hasNext()){ + it.next(); + count++; + } + System.out.println(count); + } +}