Collector plugin for Thanados
This commit is contained in:
parent
62af1fc637
commit
305bc617ff
|
@ -0,0 +1,17 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.thanados;
|
||||
|
||||
import eu.dnetlib.data.collector.plugins.httplist.HttpListIterator;
|
||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||
import eu.dnetlib.rmi.data.InterfaceDescriptor;
|
||||
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
|
||||
|
||||
public class ThanadosCollectorPlugin extends AbstractCollectorPlugin {
|
||||
|
||||
@Override
|
||||
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) {
|
||||
final String baseUrl = interfaceDescriptor.getBaseUrl();
|
||||
final String listAddress = interfaceDescriptor.getParams().get("listUrl");
|
||||
|
||||
return () -> new ThanadosIterator(baseUrl, listAddress);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.thanados;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
import eu.dnetlib.data.collector.ThreadSafeIterator;
|
||||
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.List;
|
||||
|
||||
public class ThanadosIterator extends ThreadSafeIterator {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ThanadosIterator.class);
|
||||
private final CloseableHttpClient client = HttpClients.createDefault();
|
||||
|
||||
private String baseUrl;
|
||||
private String currentLine;
|
||||
private List<String> identifiers;
|
||||
private int counter = 0;
|
||||
private String urlFormat = "%s/%s?format=xml";
|
||||
|
||||
public ThanadosIterator(final String baseUrl, final String listAddress) {
|
||||
try {
|
||||
this.baseUrl = baseUrl;
|
||||
this.identifiers = downloadIdentifierList(listAddress);
|
||||
this.counter = 0;
|
||||
} catch (Exception e) {
|
||||
throw new CollectorServiceRuntimeException("Error creating iterator", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doHasNext() {
|
||||
return counter <= this.identifiers.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String doNext() {
|
||||
try {
|
||||
if (counter <= this.identifiers.size()) {
|
||||
String toDownload = String.format(urlFormat, baseUrl, identifiers.get(counter));
|
||||
log.debug("Downloading "+toDownload);
|
||||
return download(toDownload);
|
||||
} else {
|
||||
throw new CollectorServiceRuntimeException("Iterator has reached the end");
|
||||
}
|
||||
} finally {
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
protected List<String> downloadIdentifierList(final String listUrl) {
|
||||
|
||||
String list = download(listUrl);
|
||||
Gson converter = new Gson();
|
||||
Type type = new TypeToken<List<String>>(){}.getType();
|
||||
return converter.fromJson(list, type );
|
||||
|
||||
}
|
||||
|
||||
private String download(final String url) {
|
||||
log.debug("download: "+url);
|
||||
final HttpGet method = new HttpGet(url);
|
||||
|
||||
try(CloseableHttpResponse response = client.execute(method)) {
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
|
||||
if (HttpStatus.SC_OK == statusCode) {
|
||||
return IOUtils.toString(new BufferedInputStream(response.getEntity().getContent()), "utf-8");
|
||||
} else {
|
||||
throw new CollectorServiceRuntimeException("Error " + statusCode + " dowloading url: " + url);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new CollectorServiceRuntimeException("Error downloading url: " + url);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<String> getIdentifiers() {
|
||||
return identifiers;
|
||||
}
|
||||
|
||||
public void setIdentifiers(List<String> identifiers) {
|
||||
this.identifiers = identifiers;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.thanados;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
@Ignore
|
||||
public class ThanadosIteratorTest {
|
||||
|
||||
String sitelist = "https://thanados.net/sites/sitelist";
|
||||
ThanadosIterator it;
|
||||
|
||||
@Test
|
||||
public void testDownloadList(){
|
||||
it = new ThanadosIterator("", sitelist);
|
||||
int count = 0;
|
||||
it.getIdentifiers().stream().forEach(id -> System.out.println(id));
|
||||
|
||||
System.out.println(it.getIdentifiers().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDownload(){
|
||||
int count = 0;
|
||||
it = new ThanadosIterator("https://thanados.openatlas.eu/api/0.3/subunits/", sitelist);
|
||||
while(it.hasNext()){
|
||||
it.next();
|
||||
count++;
|
||||
}
|
||||
System.out.println(count);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue