forked from D-Net/dnet-hadoop
[WebCrawl]remove relations for pid not doi
This commit is contained in:
parent
3efd5b1308
commit
143c525343
|
@ -104,8 +104,8 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
||||||
final String ror = ROR_PREFIX
|
final String ror = ROR_PREFIX
|
||||||
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
|
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
|
||||||
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
|
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
|
||||||
ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
|
// ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
|
||||||
ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
|
// ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
.iterator();
|
.iterator();
|
||||||
|
@ -139,11 +139,17 @@ public class CreateActionSetFromWebEntries implements Serializable {
|
||||||
"institution", functions
|
"institution", functions
|
||||||
.explode(
|
.explode(
|
||||||
functions.col("institutions")))
|
functions.col("institutions")))
|
||||||
|
|
||||||
.selectExpr(
|
.selectExpr(
|
||||||
"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
|
"id", "doi", "institution.ror as ror",
|
||||||
"institution.country_code as country_code", "publication_year")
|
"institution.country_code as country_code", "publication_year")
|
||||||
.distinct();
|
.distinct();
|
||||||
|
|
||||||
|
// .selectExpr(
|
||||||
|
// "id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror",
|
||||||
|
// "institution.country_code as country_code", "publication_year")
|
||||||
|
// .distinct();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
|
private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
|
||||||
|
|
Loading…
Reference in New Issue