forked from D-Net/dnet-hadoop
[Clean Country]changes related to D-Net/dnet-hadoop#241 (comment)
This commit is contained in:
parent
62d2138806
commit
7dbdd4a0fe
|
@ -10,6 +10,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import javax.swing.text.html.Option;
|
import javax.swing.text.html.Option;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -94,7 +95,6 @@ public class CleanCountrySparkJob implements Serializable {
|
||||||
List<String> hostedBy = spark
|
List<String> hostedBy = spark
|
||||||
.read()
|
.read()
|
||||||
.textFile(datasourcePath)
|
.textFile(datasourcePath)
|
||||||
// .filter((FilterFunction<String>) ds -> !ds.equals(collectedfrom))
|
|
||||||
.collectAsList();
|
.collectAsList();
|
||||||
|
|
||||||
Dataset<T> res = spark
|
Dataset<T> res = spark
|
||||||
|
@ -113,7 +113,8 @@ public class CleanCountrySparkJob implements Serializable {
|
||||||
if (r
|
if (r
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(p -> p.getQualifier().getClassid().equals("doi") && pidInParam(p.getValue(), verifyParam))) {
|
.anyMatch(p -> p.getQualifier().getClassid()
|
||||||
|
.equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) {
|
||||||
r
|
r
|
||||||
.setCountry(
|
.setCountry(
|
||||||
r
|
r
|
||||||
|
|
Loading…
Reference in New Issue