forked from D-Net/dnet-hadoop
[webcrawl] the blacklist is now in json and no more in csv after the normalization process
This commit is contained in:
parent
7cff281d3e
commit
c7f6669f1a
|
@ -148,8 +148,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
|||
|
||||
return spark
|
||||
.read()
|
||||
.option("header", true)
|
||||
.csv(inputPath)
|
||||
.json(inputPath)
|
||||
.select("OpenAlexId");
|
||||
}
|
||||
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
DOI / PMID,OpenAlexId,Comments,
|
||||
https://doi.org/10.1098/rstl.1684.0023,https://openalex.org/W2124362779,,
|
|
|
@ -0,0 +1 @@
|
|||
{"doi":"https://doi.org/10.1098/rstl.1684.0023","OpenAlexId":"https://openalex.org/W2124362779"}
|
Loading…
Reference in New Issue