[webcrawl] the blacklist is now in json and no more in csv after the normalization process

This commit is contained in:
Miriam Baglioni 2024-07-25 15:20:18 +02:00
parent 7cff281d3e
commit c7f6669f1a
3 changed files with 2 additions and 4 deletions

View File

@ -148,8 +148,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
return spark
.read()
.option("header", true)
.csv(inputPath)
.json(inputPath)
.select("OpenAlexId");
}

View File

@ -1,2 +0,0 @@
DOI / PMID,OpenAlexId,Comments,
https://doi.org/10.1098/rstl.1684.0023,https://openalex.org/W2124362779,,
1 DOI / PMID OpenAlexId Comments
2 https://doi.org/10.1098/rstl.1684.0023 https://openalex.org/W2124362779

View File

@ -0,0 +1 @@
{"doi":"https://doi.org/10.1098/rstl.1684.0023","OpenAlexId":"https://openalex.org/W2124362779"}