forked from D-Net/dnet-hadoop
[gtr2 plugin] changed to try not to die if one publication link point to the website of the project
This commit is contained in:
parent
69dad7e2bf
commit
19a9bddab1
|
@ -160,7 +160,7 @@ public class ORCIDExtractor extends Thread {
|
|||
}
|
||||
} finally {
|
||||
for (SequenceFile.Writer k : fileMap.values()) {
|
||||
log.info("Thread {}: Completed processed {} items", id, extractedItem);
|
||||
log.info("Thread {}: Completed processed {} items", id, extractedItem);
|
||||
k.hflush();
|
||||
k.close();
|
||||
}
|
||||
|
|
|
@ -152,6 +152,12 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
|
|||
|
||||
} catch (final Throwable e) {
|
||||
log.error("Error dowloading url: {}, attempt = {}", cleanUrl, attempt, e);
|
||||
if(attempt == -1)
|
||||
try{
|
||||
DocumentHelper.parseText("<empty></empty>");
|
||||
}catch(Throwable t){
|
||||
throw new RuntimeException();
|
||||
}
|
||||
if (attempt >= MAX_ATTEMPTS) {
|
||||
throw new RuntimeException("Error downloading url: " + cleanUrl, e);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue