skip urls with authentication

This commit is contained in:
Michele Artini 2024-09-23 10:16:53 +02:00
parent 339d8124f2
commit 6b0f7cc8b0
2 changed files with 33 additions and 1 deletions

View File

@ -125,8 +125,13 @@ public class OsfPreprintsIterator implements Iterator<String> {
final String xml = JsonUtils.convertToXML(json);
return DocumentHelper.parseText(xml);
} catch (final Throwable e) {
log.warn(e.getMessage(), e);
if ((e instanceof CollectorException) && e.getMessage().contains("401")) {
final Element root = DocumentHelper.createElement("error_401_authorization_required");
return DocumentHelper.createDocument(root);
}
return downloadUrl(url, attempt + 1);
}
}

View File

@ -1,11 +1,15 @@
package eu.dnetlib.dhp.collection.plugin.osf;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;
import org.dom4j.DocumentHelper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
@ -17,6 +21,7 @@ import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class OsfPreprintsCollectorPluginTest {
@ -50,7 +55,7 @@ public class OsfPreprintsCollectorPluginTest {
}
@Test
// @Disabled
@Disabled
void test_limited() throws CollectorException {
final AtomicInteger i = new AtomicInteger(0);
final Stream<String> stream = this.plugin.collect(this.api, new AggregatorReport());
@ -83,4 +88,26 @@ public class OsfPreprintsCollectorPluginTest {
Assertions.assertTrue(i.get() > 0);
}
@Test
// @Disabled
void test_authentication_required() {
final HttpConnector2 connector = new HttpConnector2();
try {
final String res = connector.getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json");
System.out.println(res);
fail();
} catch (final Throwable e) {
System.out.println("**** ERROR: " + e.getMessage());
if ((e instanceof CollectorException) && e.getMessage().contains("401")) {
System.out.println(" XML: " + DocumentHelper.createDocument().getRootElement().detach());
}
assertTrue(e.getMessage().contains("401"));
}
}
}