forked from D-Net/dnet-hadoop
[bip affiliations] considers only DOI based records
This commit is contained in:
parent
fecbf93e0e
commit
8e7ef79ce0
|
@ -129,7 +129,8 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
Dataset<Row> df = spark
|
Dataset<Row> df = spark
|
||||||
.read()
|
.read()
|
||||||
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
|
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
|
||||||
.json(inputPath);
|
.json(inputPath)
|
||||||
|
.where("DOI is not null");
|
||||||
|
|
||||||
// unroll nested arrays
|
// unroll nested arrays
|
||||||
df = df
|
df = df
|
||||||
|
|
Loading…
Reference in New Issue