1
0
Fork 0

code reindent + tests

This commit is contained in:
Michele Artini 2024-05-02 09:51:33 +02:00
parent 2615136efc
commit f4068de298
2 changed files with 133 additions and 100 deletions

View File

@ -65,7 +65,8 @@ public class RestIterator implements Iterator<String> {
private final int resultSizeValue;
private int resumptionInt = 0; // integer resumption token (first record to harvest)
private int resultTotal = -1;
private String resumptionStr = Integer.toString(this.resumptionInt); // string resumption token (first record to harvest
private String resumptionStr = Integer.toString(this.resumptionInt); // string resumption token (first record to
// harvest
// or token scanned from results)
private InputStream resultStream;
private Transformer transformer;
@ -82,9 +83,9 @@ public class RestIterator implements Iterator<String> {
private int discoverResultSize = 0;
private int pagination = 1;
/*
* While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in json. useful for
* cases when the target API expects a resultFormatValue != json, but the results are returned in json. An example is the EU Open Data
* Portal API: resultFormatValue=standard, results are in json format.
* While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in
* json. useful for cases when the target API expects a resultFormatValue != json, but the results are returned in
* json. An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
*/
private final String resultOutputFormat;
@ -121,7 +122,8 @@ public class RestIterator implements Iterator<String> {
this.queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
: "";
this.querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
this.querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr
: "";
try {
initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
@ -132,7 +134,8 @@ public class RestIterator implements Iterator<String> {
initQueue();
}
private void initXmlTransformation(final String resultTotalXpath, final String resumptionXpath, final String entityXpath)
private void initXmlTransformation(final String resultTotalXpath, final String resumptionXpath,
final String entityXpath)
throws TransformerConfigurationException, XPathExpressionException {
final TransformerFactory factory = TransformerFactory.newInstance();
this.transformer = factory.newTransformer();
@ -155,7 +158,6 @@ public class RestIterator implements Iterator<String> {
/*
* (non-Javadoc)
*
* @see java.util.Iterator#hasNext()
*/
@Override
@ -169,7 +171,6 @@ public class RestIterator implements Iterator<String> {
/*
* (non-Javadoc)
*
* @see java.util.Iterator#next()
*/
@Override
@ -192,7 +193,9 @@ public class RestIterator implements Iterator<String> {
*/
private String downloadPage(String query, final int attempt) throws CollectorException {
if (attempt > MAX_ATTEMPTS) { throw new CollectorException("Max Number of attempts reached, query:" + query); }
if (attempt > MAX_ATTEMPTS) {
throw new CollectorException("Max Number of attempts reached, query:" + query);
}
if (attempt > 0) {
final int delay = (attempt * 5000);
@ -254,15 +257,19 @@ public class RestIterator implements Iterator<String> {
}
if (!(emptyXml).equalsIgnoreCase(resultXml)) {
resultNode = (Node) this.xpath.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
resultNode = (Node) this.xpath
.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET);
log.debug("nodeList.length: {}", nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); i++) {
final StringWriter sw = new StringWriter();
this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
final String toEnqueue = sw.toString();
if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml);
if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue)
|| emptyXml.equalsIgnoreCase(toEnqueue)) {
log
.warn(
"The following record resulted in empty item for the feeding queue: {}", resultXml);
} else {
this.recordQueue.add(sw.toString());
}
@ -283,7 +290,9 @@ public class RestIterator implements Iterator<String> {
break;
case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808)
if (this.resultSizeValue < 2) { throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2"); }
if (this.resultSizeValue < 2) {
throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
}
qUrlArgument = qUrl.getQuery();
final String[] arrayQUrlArgument = qUrlArgument.split("&");
for (final String arrayUrlArgStr : arrayQUrlArgument) {
@ -328,7 +337,8 @@ public class RestIterator implements Iterator<String> {
this.resumptionStr = Integer.toString(this.resumptionInt);
break;
case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in
case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor
// in
// solr)
// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode:
// deep-cursor, Param 'resultSizeValue' is less than 2");}
@ -340,7 +350,8 @@ public class RestIterator implements Iterator<String> {
if ((nodeList != null) && (nodeList.getLength() < this.discoverResultSize)) {
this.resumptionInt += ((nodeList.getLength() + 1) - this.resultSizeValue);
} else {
this.resumptionInt += (nodeList.getLength() - this.resultSizeValue); // subtract the resultSizeValue
this.resumptionInt += (nodeList.getLength() - this.resultSizeValue); // subtract the
// resultSizeValue
// because the iteration is over
// real length and the
// resultSizeValue is added before
@ -350,7 +361,8 @@ public class RestIterator implements Iterator<String> {
this.discoverResultSize = nodeList.getLength();
log
.debug("downloadPage().deep-cursor: resumptionStr=" + this.resumptionStr + " ; queryParams="
.debug(
"downloadPage().deep-cursor: resumptionStr=" + this.resumptionStr + " ; queryParams="
+ this.queryParams + " resumptionLengthIncreased: " + this.resumptionInt);
break;
@ -380,7 +392,8 @@ public class RestIterator implements Iterator<String> {
log.debug("resultTotal: " + this.resultTotal);
log.debug("resInt: " + this.resumptionInt);
if (this.resumptionInt <= this.resultTotal) {
nextQuery = this.baseUrl + "?" + this.queryParams + this.querySize + "&" + this.resumptionParam + "=" + this.resumptionStr
nextQuery = this.baseUrl + "?" + this.queryParams + this.querySize + "&" + this.resumptionParam + "="
+ this.resumptionStr
+ this.queryFormat;
} else {
nextQuery = "";

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.collection.plugin.rest;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
@ -69,7 +70,7 @@ public class OsfPreprintCollectorTest {
@Test
@Disabled
void test() throws CollectorException {
void test_limited() throws CollectorException {
final AtomicInteger i = new AtomicInteger(0);
final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport());
@ -82,4 +83,23 @@ public class OsfPreprintCollectorTest {
log.info("{}", i.intValue());
Assertions.assertTrue(i.intValue() > 0);
}
@Test
@Disabled
void test_all() throws CollectorException {
final AtomicLong i = new AtomicLong(0);
final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport());
stream.forEach(s -> {
Assertions.assertTrue(s.length() > 0);
if ((i.incrementAndGet() % 1000) == 0) {
log.info("COLLECTED: {}", i.get());
}
});
log.info("TOTAL: {}", i.get());
Assertions.assertTrue(i.get() > 0);
}
}