code formatting
This commit is contained in:
parent
3854fcc5e0
commit
6e0b6a886f
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
|
@ -130,7 +130,8 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
|
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static JavaPairRDD<Text, Text> prepareAffiliationRelationFromPublisherNewModel(SparkSession spark, String inputPath,
|
private static JavaPairRDD<Text, Text> prepareAffiliationRelationFromPublisherNewModel(SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
List<KeyValue> collectedfrom) {
|
List<KeyValue> collectedfrom) {
|
||||||
|
|
||||||
Dataset<Row> df = spark
|
Dataset<Row> df = spark
|
||||||
|
@ -153,7 +154,6 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
.json(inputPath)
|
.json(inputPath)
|
||||||
.where("DOI is not null");
|
.where("DOI is not null");
|
||||||
|
|
||||||
|
|
||||||
return getTextTextJavaPairRDD(collectedfrom, df.selectExpr("DOI", "Organizations as Matchings"));
|
return getTextTextJavaPairRDD(collectedfrom, df.selectExpr("DOI", "Organizations as Matchings"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,9 +69,12 @@ public class CollectorWorker extends ReportingJob {
|
||||||
scheduleReport(counter);
|
scheduleReport(counter);
|
||||||
|
|
||||||
try (SequenceFile.Writer writer = SequenceFile
|
try (SequenceFile.Writer writer = SequenceFile
|
||||||
.createWriter(this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
.createWriter(
|
||||||
.keyClass(IntWritable.class), SequenceFile.Writer
|
this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
||||||
.valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
.keyClass(IntWritable.class),
|
||||||
|
SequenceFile.Writer
|
||||||
|
.valueClass(Text.class),
|
||||||
|
SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
||||||
final IntWritable key = new IntWritable(counter.get());
|
final IntWritable key = new IntWritable(counter.get());
|
||||||
final Text value = new Text();
|
final Text value = new Text();
|
||||||
plugin
|
plugin
|
||||||
|
|
|
@ -36,7 +36,9 @@ public class OsfPreprintsCollectorPlugin implements CollectorPlugin {
|
||||||
.map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT))
|
.map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT))
|
||||||
.orElse(PAGE_SIZE_VALUE_DEFAULT);
|
.orElse(PAGE_SIZE_VALUE_DEFAULT);
|
||||||
|
|
||||||
if (StringUtils.isBlank(baseUrl)) { throw new CollectorException("Param 'baseUrl' is null or empty"); }
|
if (StringUtils.isBlank(baseUrl)) {
|
||||||
|
throw new CollectorException("Param 'baseUrl' is null or empty");
|
||||||
|
}
|
||||||
|
|
||||||
final OsfPreprintsIterator it = new OsfPreprintsIterator(baseUrl, pageSize, getClientParams());
|
final OsfPreprintsIterator it = new OsfPreprintsIterator(baseUrl, pageSize, getClientParams());
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,8 @@ public class OsfPreprintsIterator implements Iterator<String> {
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
synchronized (this.recordQueue) {
|
synchronized (this.recordQueue) {
|
||||||
while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl) && this.currentUrl.startsWith("http")) {
|
while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl)
|
||||||
|
&& this.currentUrl.startsWith("http")) {
|
||||||
try {
|
try {
|
||||||
this.currentUrl = downloadPage(this.currentUrl);
|
this.currentUrl = downloadPage(this.currentUrl);
|
||||||
} catch (final CollectorException e) {
|
} catch (final CollectorException e) {
|
||||||
|
@ -63,7 +64,9 @@ public class OsfPreprintsIterator implements Iterator<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!this.recordQueue.isEmpty()) { return true; }
|
if (!this.recordQueue.isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -112,7 +115,9 @@ public class OsfPreprintsIterator implements Iterator<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private Document downloadUrl(final String url, final int attempt) throws CollectorException {
|
private Document downloadUrl(final String url, final int attempt) throws CollectorException {
|
||||||
if (attempt > MAX_ATTEMPTS) { throw new CollectorException("Max Number of attempts reached, url:" + url); }
|
if (attempt > MAX_ATTEMPTS) {
|
||||||
|
throw new CollectorException("Max Number of attempts reached, url:" + url);
|
||||||
|
}
|
||||||
|
|
||||||
if (attempt > 0) {
|
if (attempt > 0) {
|
||||||
final int delay = (attempt * 5000);
|
final int delay = (attempt * 5000);
|
||||||
|
|
|
@ -112,7 +112,6 @@ public class PrepareAffiliationRelationsTest {
|
||||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||||
.map(aa -> ((Relation) aa.getPayload()));
|
.map(aa -> ((Relation) aa.getPayload()));
|
||||||
|
|
||||||
|
|
||||||
// count the number of relations
|
// count the number of relations
|
||||||
assertEquals(150, tmp.count());// 18 + 24 *3 + 30 * 2 =
|
assertEquals(150, tmp.count());// 18 + 24 *3 + 30 * 2 =
|
||||||
|
|
||||||
|
@ -173,7 +172,6 @@ public class PrepareAffiliationRelationsTest {
|
||||||
+ IdentifierFactory.md5("https://ror.org/03265fv13") + "'")
|
+ IdentifierFactory.md5("https://ror.org/03265fv13") + "'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3, execVerification
|
3, execVerification
|
||||||
|
@ -185,6 +183,5 @@ public class PrepareAffiliationRelationsTest {
|
||||||
+ IdentifierFactory.md5("https://ror.org/00a0n9e72") + "'")
|
+ IdentifierFactory.md5("https://ror.org/00a0n9e72") + "'")
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,8 @@ public class OsfPreprintsCollectorPluginTest {
|
||||||
@Test
|
@Test
|
||||||
@Disabled
|
@Disabled
|
||||||
void test_one() throws CollectorException {
|
void test_one() throws CollectorException {
|
||||||
this.plugin.collect(this.api, new AggregatorReport())
|
this.plugin
|
||||||
|
.collect(this.api, new AggregatorReport())
|
||||||
.limit(1)
|
.limit(1)
|
||||||
.forEach(log::info);
|
.forEach(log::info);
|
||||||
}
|
}
|
||||||
|
@ -95,7 +96,8 @@ public class OsfPreprintsCollectorPluginTest {
|
||||||
final HttpConnector2 connector = new HttpConnector2();
|
final HttpConnector2 connector = new HttpConnector2();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final String res = connector.getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json");
|
final String res = connector
|
||||||
|
.getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json");
|
||||||
System.out.println(res);
|
System.out.println(res);
|
||||||
fail();
|
fail();
|
||||||
} catch (final Throwable e) {
|
} catch (final Throwable e) {
|
||||||
|
|
Loading…
Reference in New Issue