1
0
Fork 0

Merge pull request '[WebCrawlAffiliation]remove from the creation of the action set the relations for pmc and pmid. Only doi are allowed' (#462) from affiliationFromWebCrawlOnlyDOI into beta

Reviewed-on: D-Net/dnet-hadoop#462
This commit is contained in:
Claudio Atzori 2024-07-17 11:12:32 +02:00
commit e39e8bbd47
1 changed files with 3 additions and 3 deletions

View File

@ -105,8 +105,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
final String ror = ROR_PREFIX final String ror = ROR_PREFIX
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror"))); + IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror)); ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
return ret return ret
.iterator(); .iterator();
@ -140,8 +139,9 @@ public class CreateActionSetFromWebEntries implements Serializable {
"institution", functions "institution", functions
.explode( .explode(
functions.col("institutions"))) functions.col("institutions")))
.selectExpr( .selectExpr(
"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror", "id", "doi", "institution.ror as ror",
"institution.country_code as country_code", "publication_year") "institution.country_code as country_code", "publication_year")
.distinct(); .distinct();