code reindent + tests

This commit is contained in:
Michele Artini 2024-05-02 09:51:33 +02:00
parent 2615136efc
commit f4068de298
2 changed files with 133 additions and 100 deletions

View File

@ -65,7 +65,8 @@ public class RestIterator implements Iterator<String> {
private final int resultSizeValue; private final int resultSizeValue;
private int resumptionInt = 0; // integer resumption token (first record to harvest) private int resumptionInt = 0; // integer resumption token (first record to harvest)
private int resultTotal = -1; private int resultTotal = -1;
private String resumptionStr = Integer.toString(this.resumptionInt); // string resumption token (first record to harvest private String resumptionStr = Integer.toString(this.resumptionInt); // string resumption token (first record to
// harvest
// or token scanned from results) // or token scanned from results)
private InputStream resultStream; private InputStream resultStream;
private Transformer transformer; private Transformer transformer;
@ -82,9 +83,9 @@ public class RestIterator implements Iterator<String> {
private int discoverResultSize = 0; private int discoverResultSize = 0;
private int pagination = 1; private int pagination = 1;
/* /*
* While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in json. useful for * While resultFormatValue is added to the request parameter, this is used to say that the results are retrieved in
* cases when the target API expects a resultFormatValue != json, but the results are returned in json. An example is the EU Open Data * json. useful for cases when the target API expects a resultFormatValue != json, but the results are returned in
* Portal API: resultFormatValue=standard, results are in json format. * json. An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
*/ */
private final String resultOutputFormat; private final String resultOutputFormat;
@ -121,7 +122,8 @@ public class RestIterator implements Iterator<String> {
this.queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue this.queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue
: ""; : "";
this.querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : ""; this.querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr
: "";
try { try {
initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath); initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
@ -132,7 +134,8 @@ public class RestIterator implements Iterator<String> {
initQueue(); initQueue();
} }
private void initXmlTransformation(final String resultTotalXpath, final String resumptionXpath, final String entityXpath) private void initXmlTransformation(final String resultTotalXpath, final String resumptionXpath,
final String entityXpath)
throws TransformerConfigurationException, XPathExpressionException { throws TransformerConfigurationException, XPathExpressionException {
final TransformerFactory factory = TransformerFactory.newInstance(); final TransformerFactory factory = TransformerFactory.newInstance();
this.transformer = factory.newTransformer(); this.transformer = factory.newTransformer();
@ -155,7 +158,6 @@ public class RestIterator implements Iterator<String> {
/* /*
* (non-Javadoc) * (non-Javadoc)
*
* @see java.util.Iterator#hasNext() * @see java.util.Iterator#hasNext()
*/ */
@Override @Override
@ -169,7 +171,6 @@ public class RestIterator implements Iterator<String> {
/* /*
* (non-Javadoc) * (non-Javadoc)
*
* @see java.util.Iterator#next() * @see java.util.Iterator#next()
*/ */
@Override @Override
@ -192,7 +193,9 @@ public class RestIterator implements Iterator<String> {
*/ */
private String downloadPage(String query, final int attempt) throws CollectorException { private String downloadPage(String query, final int attempt) throws CollectorException {
if (attempt > MAX_ATTEMPTS) { throw new CollectorException("Max Number of attempts reached, query:" + query); } if (attempt > MAX_ATTEMPTS) {
throw new CollectorException("Max Number of attempts reached, query:" + query);
}
if (attempt > 0) { if (attempt > 0) {
final int delay = (attempt * 5000); final int delay = (attempt * 5000);
@ -254,15 +257,19 @@ public class RestIterator implements Iterator<String> {
} }
if (!(emptyXml).equalsIgnoreCase(resultXml)) { if (!(emptyXml).equalsIgnoreCase(resultXml)) {
resultNode = (Node) this.xpath.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE); resultNode = (Node) this.xpath
.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET); nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET);
log.debug("nodeList.length: {}", nodeList.getLength()); log.debug("nodeList.length: {}", nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); i++) { for (int i = 0; i < nodeList.getLength(); i++) {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
final String toEnqueue = sw.toString(); final String toEnqueue = sw.toString();
if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue)
log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml); || emptyXml.equalsIgnoreCase(toEnqueue)) {
log
.warn(
"The following record resulted in empty item for the feeding queue: {}", resultXml);
} else { } else {
this.recordQueue.add(sw.toString()); this.recordQueue.add(sw.toString());
} }
@ -283,7 +290,9 @@ public class RestIterator implements Iterator<String> {
break; break;
case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808) case "discover": // size of result items unknown, iterate over items (for openDOAR - 201808)
if (this.resultSizeValue < 2) { throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2"); } if (this.resultSizeValue < 2) {
throw new CollectorException("Mode: discover, Param 'resultSizeValue' is less than 2");
}
qUrlArgument = qUrl.getQuery(); qUrlArgument = qUrl.getQuery();
final String[] arrayQUrlArgument = qUrlArgument.split("&"); final String[] arrayQUrlArgument = qUrlArgument.split("&");
for (final String arrayUrlArgStr : arrayQUrlArgument) { for (final String arrayUrlArgStr : arrayQUrlArgument) {
@ -328,7 +337,8 @@ public class RestIterator implements Iterator<String> {
this.resumptionStr = Integer.toString(this.resumptionInt); this.resumptionStr = Integer.toString(this.resumptionInt);
break; break;
case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor in case "deep-cursor": // size of result items unknown, iterate over items (for supporting deep cursor
// in
// solr) // solr)
// isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: // isn't relevant -- if (resultSizeValue < 2) {throw new CollectorServiceException("Mode:
// deep-cursor, Param 'resultSizeValue' is less than 2");} // deep-cursor, Param 'resultSizeValue' is less than 2");}
@ -340,7 +350,8 @@ public class RestIterator implements Iterator<String> {
if ((nodeList != null) && (nodeList.getLength() < this.discoverResultSize)) { if ((nodeList != null) && (nodeList.getLength() < this.discoverResultSize)) {
this.resumptionInt += ((nodeList.getLength() + 1) - this.resultSizeValue); this.resumptionInt += ((nodeList.getLength() + 1) - this.resultSizeValue);
} else { } else {
this.resumptionInt += (nodeList.getLength() - this.resultSizeValue); // subtract the resultSizeValue this.resumptionInt += (nodeList.getLength() - this.resultSizeValue); // subtract the
// resultSizeValue
// because the iteration is over // because the iteration is over
// real length and the // real length and the
// resultSizeValue is added before // resultSizeValue is added before
@ -350,7 +361,8 @@ public class RestIterator implements Iterator<String> {
this.discoverResultSize = nodeList.getLength(); this.discoverResultSize = nodeList.getLength();
log log
.debug("downloadPage().deep-cursor: resumptionStr=" + this.resumptionStr + " ; queryParams=" .debug(
"downloadPage().deep-cursor: resumptionStr=" + this.resumptionStr + " ; queryParams="
+ this.queryParams + " resumptionLengthIncreased: " + this.resumptionInt); + this.queryParams + " resumptionLengthIncreased: " + this.resumptionInt);
break; break;
@ -380,7 +392,8 @@ public class RestIterator implements Iterator<String> {
log.debug("resultTotal: " + this.resultTotal); log.debug("resultTotal: " + this.resultTotal);
log.debug("resInt: " + this.resumptionInt); log.debug("resInt: " + this.resumptionInt);
if (this.resumptionInt <= this.resultTotal) { if (this.resumptionInt <= this.resultTotal) {
nextQuery = this.baseUrl + "?" + this.queryParams + this.querySize + "&" + this.resumptionParam + "=" + this.resumptionStr nextQuery = this.baseUrl + "?" + this.queryParams + this.querySize + "&" + this.resumptionParam + "="
+ this.resumptionStr
+ this.queryFormat; + this.queryFormat;
} else { } else {
nextQuery = ""; nextQuery = "";

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.collection.plugin.rest;
import java.util.HashMap; import java.util.HashMap;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
@ -69,7 +70,7 @@ public class OsfPreprintCollectorTest {
@Test @Test
@Disabled @Disabled
void test() throws CollectorException { void test_limited() throws CollectorException {
final AtomicInteger i = new AtomicInteger(0); final AtomicInteger i = new AtomicInteger(0);
final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport()); final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport());
@ -82,4 +83,23 @@ public class OsfPreprintCollectorTest {
log.info("{}", i.intValue()); log.info("{}", i.intValue());
Assertions.assertTrue(i.intValue() > 0); Assertions.assertTrue(i.intValue() > 0);
} }
@Test
@Disabled
void test_all() throws CollectorException {
final AtomicLong i = new AtomicLong(0);
final Stream<String> stream = this.rcp.collect(this.api, new AggregatorReport());
stream.forEach(s -> {
Assertions.assertTrue(s.length() > 0);
if ((i.incrementAndGet() % 1000) == 0) {
log.info("COLLECTED: {}", i.get());
}
});
log.info("TOTAL: {}", i.get());
Assertions.assertTrue(i.get() > 0);
}
} }