[openaire-affiliation]removes matchings without DOI

This commit is contained in:
Miriam Baglioni 2024-08-05 12:10:40 +02:00
parent fecbf93e0e
commit 985ca15264
2 changed files with 8 additions and 1 deletions

View File

@ -129,7 +129,8 @@ public class PrepareAffiliationRelations implements Serializable {
Dataset<Row> df = spark Dataset<Row> df = spark
.read() .read()
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>") .schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
.json(inputPath); .json(inputPath)
.where("DOI is not NULL");
// unroll nested arrays // unroll nested arrays
df = df df = df

View File

@ -31,5 +31,11 @@ class ORCIDAuthorMatchersTest {
assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin")) assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin"))
// assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented // assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented
} }
@Test def testDocumentationNames(): Unit = {
assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller-Jones"))
}
@Test def testDocumentationNames2(): Unit = {
assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller Jones"))
}
} }