[Clean Country]changes related to D-Net/dnet-hadoop#241 (comment)

This commit is contained in:
Miriam Baglioni 2022-08-10 15:13:10 +02:00
parent 62d2138806
commit 7dbdd4a0fe
1 changed files with 3 additions and 2 deletions

View File

@ -10,6 +10,7 @@ import java.util.stream.Collectors;
import javax.swing.text.html.Option; import javax.swing.text.html.Option;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FilterFunction;
@ -94,7 +95,6 @@ public class CleanCountrySparkJob implements Serializable {
List<String> hostedBy = spark List<String> hostedBy = spark
.read() .read()
.textFile(datasourcePath) .textFile(datasourcePath)
// .filter((FilterFunction<String>) ds -> !ds.equals(collectedfrom))
.collectAsList(); .collectAsList();
Dataset<T> res = spark Dataset<T> res = spark
@ -113,7 +113,8 @@ public class CleanCountrySparkJob implements Serializable {
if (r if (r
.getPid() .getPid()
.stream() .stream()
.anyMatch(p -> p.getQualifier().getClassid().equals("doi") && pidInParam(p.getValue(), verifyParam))) { .anyMatch(p -> p.getQualifier().getClassid()
.equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) {
r r
.setCountry( .setCountry(
r r