[gtr2 plugin] changed to try not to die if one publication link point to the website of the project

This commit is contained in:
Miriam Baglioni 2024-12-11 16:33:51 +01:00
parent 19a9bddab1
commit ce22b1d536
2 changed files with 12 additions and 11 deletions

View File

@ -10,6 +10,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
@ -94,7 +95,6 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
this.nextElement = this.queue.poll();
}
private List<String> fetchPage(final int pageNumber) {
final List<String> res = new ArrayList<>();
@ -107,8 +107,8 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
final Element mainEntity = (Element) ((Element) po).detach();
if (filterIncremental(mainEntity)) {
final String publicationOverview =mainEntity.attributeValue("url");
res.add(loadURL(publicationOverview, 0).asXML());
final String publicationOverview = mainEntity.attributeValue("url");
res.add(loadURL(publicationOverview, -1).asXML());
} else {
log.debug("Skipped entity");
}
@ -136,7 +136,7 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
final HttpGet req = new HttpGet(cleanUrl);
req.setHeader(HttpHeaders.ACCEPT, "application/xml");
try (final CloseableHttpResponse response = client.execute(req)) {
if(endPage == Integer.MAX_VALUE)
if (endPage == Integer.MAX_VALUE)
for (final Header header : response.getAllHeaders()) {
log.debug("HEADER: " + header.getName() + " = " + header.getValue());
if ("Link-Pages".equals(header.getName())) {
@ -151,13 +151,14 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
}
} catch (final Throwable e) {
log.error("Error dowloading url: {}, attempt = {}", cleanUrl, attempt, e);
if(attempt == -1)
try{
DocumentHelper.parseText("<empty></empty>");
}catch(Throwable t){
if (attempt == -1)
try {
return DocumentHelper.parseText("<empty></empty>");
} catch (Throwable t) {
throw new RuntimeException();
}
log.error("Error dowloading url: {}, attempt = {}", cleanUrl, attempt, e);
if (attempt >= MAX_ATTEMPTS) {
throw new RuntimeException("Error downloading url: " + cleanUrl, e);
}

View File

@ -47,9 +47,9 @@ class Gtr2PublicationsIteratorTest {
@Test
@Disabled
public void testOnePage() throws Exception {
final Iterator<String> iterator = new Gtr2PublicationsIterator(baseURL, null, "12", "12", clientParams);
final Iterator<String> iterator = new Gtr2PublicationsIterator(baseURL, null, "379", "380", clientParams);
final int count = iterateAndCount(iterator);
assertEquals(20, count);
assertEquals(50, count);
}
@Test