forked from D-Net/dnet-hadoop
[AffRo] refactoring
This commit is contained in:
parent
a54d021c37
commit
7cd8171268
|
@ -143,11 +143,11 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
|
||||
Dataset<Row> df = spark
|
||||
.read()
|
||||
.schema("`DOI` STRING, `Organizations` ARRAY<STRUCT<`PID`:STRING, `Value`:STRING,`Confidence`:DOUBLE, `Status`:STRING>>")
|
||||
.schema(
|
||||
"`DOI` STRING, `Organizations` ARRAY<STRUCT<`PID`:STRING, `Value`:STRING,`Confidence`:DOUBLE, `Status`:STRING>>")
|
||||
.json(inputPath)
|
||||
.where("DOI is not null");
|
||||
|
||||
|
||||
return getTextTextJavaPairRDD(collectedfrom, df.selectExpr("DOI", "Organizations as Matchings"));
|
||||
|
||||
}
|
||||
|
@ -158,11 +158,11 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
// load and parse affiliation relations from HDFS
|
||||
Dataset<Row> df = spark
|
||||
.read()
|
||||
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`PID`:STRING, `Value`:STRING,`Confidence`:DOUBLE, `Status`:STRING>>")
|
||||
.schema(
|
||||
"`DOI` STRING, `Matchings` ARRAY<STRUCT<`PID`:STRING, `Value`:STRING,`Confidence`:DOUBLE, `Status`:STRING>>")
|
||||
.json(inputPath)
|
||||
.where("DOI is not null");
|
||||
|
||||
|
||||
return getTextTextJavaPairRDD(collectedfrom, df);
|
||||
}
|
||||
|
||||
|
@ -175,9 +175,8 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
new Column("matching.PID").as("pidtype"),
|
||||
new Column("matching.Value").as("pidvalue"),
|
||||
new Column("matching.Confidence").as("confidence"),
|
||||
new Column("matching.Status").as("status"))
|
||||
.where("status = 'active'");
|
||||
|
||||
new Column("matching.Status").as("status"))
|
||||
.where("status = 'active'");
|
||||
|
||||
// prepare action sets for affiliation relations
|
||||
return df
|
||||
|
@ -188,14 +187,13 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
final String paperId = ID_PREFIX
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", row.getAs("doi")));
|
||||
|
||||
|
||||
// Organization to OpenAIRE identifier
|
||||
String affId = null;
|
||||
if(row.getAs("pidtype").equals("ROR"))
|
||||
//ROR id to OpenIARE id
|
||||
affId = GenerateRorActionSetJob.calculateOpenaireId(row.getAs("pidvalue"));
|
||||
if (row.getAs("pidtype").equals("ROR"))
|
||||
// ROR id to OpenIARE id
|
||||
affId = GenerateRorActionSetJob.calculateOpenaireId(row.getAs("pidvalue"));
|
||||
else
|
||||
//getting the OpenOrgs identifier for the organization
|
||||
// getting the OpenOrgs identifier for the organization
|
||||
affId = row.getAs("pidvalue");
|
||||
|
||||
Qualifier qualifier = OafMapperUtils
|
||||
|
|
|
@ -517,8 +517,10 @@ case object Crossref2Oaf {
|
|||
)
|
||||
}
|
||||
|
||||
if(doi.startsWith("10.3410") || doi.startsWith("10.12703"))
|
||||
instance.setHostedby(OafMapperUtils.keyValue(OafMapperUtils.createOpenaireId(10, "openaire____::H1Connect", true),"H1Connect"))
|
||||
if (doi.startsWith("10.3410") || doi.startsWith("10.12703"))
|
||||
instance.setHostedby(
|
||||
OafMapperUtils.keyValue(OafMapperUtils.createOpenaireId(10, "openaire____::H1Connect", true), "H1Connect")
|
||||
)
|
||||
|
||||
instance.setAccessright(
|
||||
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
||||
|
|
|
@ -110,12 +110,11 @@ public class PrepareAffiliationRelationsTest {
|
|||
// );
|
||||
// }
|
||||
// count the number of relations
|
||||
assertEquals(168, tmp.count());//150 +
|
||||
assertEquals(168, tmp.count());// 150 +
|
||||
|
||||
Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||
dataset.createOrReplaceTempView("result");
|
||||
|
||||
|
||||
Dataset<Row> execVerification = spark
|
||||
.sql("select r.relType, r.relClass, r.source, r.target, r.dataInfo.trust from result r");
|
||||
|
||||
|
@ -159,7 +158,15 @@ public class PrepareAffiliationRelationsTest {
|
|||
.assertEquals(
|
||||
5, execVerification.filter("source = '" + publisherid + "' and target = '" + rorId + "'").count());
|
||||
|
||||
Assertions.assertEquals(1,execVerification.filter("source = '" + ID_PREFIX
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9")) + "' and target = '" + "20|ror_________::" + IdentifierFactory.md5("https://ror.org/03265fv13") + "'").count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, execVerification
|
||||
.filter(
|
||||
"source = '" + ID_PREFIX
|
||||
+ IdentifierFactory
|
||||
.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s00217-010-1268-9"))
|
||||
+ "' and target = '" + "20|ror_________::"
|
||||
+ IdentifierFactory.md5("https://ror.org/03265fv13") + "'")
|
||||
.count());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue