added test to verify the dump for indicators at the level of project and datasource. Fixed issue on identifier with the prefix

This commit is contained in:
Miriam Baglioni 2023-06-01 15:10:00 +02:00
parent 32983e90d1
commit 2e8639f22d
11 changed files with 76 additions and 67 deletions

View File

@ -1,11 +1,13 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 07/11/22
*/
public class UsageCounts {
public class UsageCounts implements Serializable {
private String downloads;
private String views;

View File

@ -1,38 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
/**
* To represent the generic node in a relation. It has the following parameters: - private String id the openaire id of
* the entity in the relation - private String type the type of the entity in the relation. Consider the generic
* relation between a Result R and a Project P, the node representing R will have as id the id of R and as type result,
* while the node representing the project will have as id the id of the project and as type project
*/
public class Node implements Serializable {
private String id;
private String type;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public static Node newInstance(String id, String type) {
Node node = new Node();
node.id = id;
node.type = type;
return node;
}
}

View File

@ -73,6 +73,14 @@ public class Project implements Serializable {
@JsonSchema(description = "Indicators computed for this project, for example UsageCount ones")
private Indicator indicators;
public Indicator getIndicators() {
return indicators;
}
public void setIndicators(Indicator indicators) {
this.indicators = indicators;
}
public String getId() {
return id;
}

View File

@ -15,7 +15,7 @@ import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.schema.common.ModelConstants;

View File

@ -213,7 +213,7 @@ public class SparkDumpEntitiesJob implements Serializable {
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
Datasource datasource = new Datasource();
datasource.setId(d.getId());
datasource.setId(d.getId().substring(3));
Optional
.ofNullable(d.getOriginalId())
@ -337,6 +337,9 @@ public class SparkDumpEntitiesJob implements Serializable {
.ofNullable(d.getJournal())
.ifPresent(j -> datasource.setJournal(getContainer(j)));
Optional.ofNullable(d.getMeasures())
.ifPresent(m -> datasource.setIndicators(Utils.getIndicator(d.getMeasures())));
return datasource;
}
@ -503,7 +506,7 @@ public class SparkDumpEntitiesJob implements Serializable {
project.setFunding(funList);
if (Optional.ofNullable(p.getMeasures()).isPresent()) {
project.setIndicators(Utils.getIndicator(p.getMeasures()));
}
return project;
}
@ -605,7 +608,7 @@ public class SparkDumpEntitiesJob implements Serializable {
Optional
.ofNullable(org.getId())
.ifPresent(value -> organization.setId(value));
.ifPresent(value -> organization.setId(value.substring(3)));
Optional
.ofNullable(org.getPid())

View File

@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;

View File

@ -24,7 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;

View File

@ -6,12 +6,14 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import eu.dnetlib.dhp.oa.model.Indicator;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
@ -97,10 +99,11 @@ public class DumpOrganizationProjectDatasourceTest {
Assertions.assertEquals(15, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Organization>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
//TODO write significant assertions
// verificationDataset
// .foreach(
// (ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Organization>) o -> System.out
// .println(OBJECT_MAPPER.writeValueAsString(o)));
}
@ -132,10 +135,25 @@ public class DumpOrganizationProjectDatasourceTest {
Assertions.assertEquals(12, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Project>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
Assertions.assertEquals(10, verificationDataset.filter("indicators is NULL").count());
Assertions.assertEquals(2, verificationDataset.filter("indicators is not NULL").count());
Assertions.assertEquals(1, verificationDataset.filter("indicators is not NULL AND id == 'aka_________::01bb7b48e29d732a1c7bc5150b9195c4'").count());
Assertions.assertEquals(1, verificationDataset.filter("indicators is not NULL AND id == 'aka_________::9d1af21dbd0f5bc719f71553d19a6b3a'").count());
eu.dnetlib.dhp.oa.model.graph.Project p = tmp.filter(pr -> pr.getId().equals("aka_________::01bb7b48e29d732a1c7bc5150b9195c4")).first();
Assertions.assertEquals("2019",p.getIndicators().getUsageCounts().getDownloads());
Assertions.assertEquals("1804",p.getIndicators().getUsageCounts().getViews());
Assertions.assertNull(p.getIndicators().getImpactMeasures());
p = tmp.filter(pr -> pr.getId().equals("aka_________::9d1af21dbd0f5bc719f71553d19a6b3a")).first();
Assertions.assertEquals("139",p.getIndicators().getUsageCounts().getDownloads());
Assertions.assertEquals("53",p.getIndicators().getUsageCounts().getViews());
Assertions.assertNull(p.getIndicators().getImpactMeasures());
//TODO write significant assertions
// verificationDataset
// .foreach(
// (ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Project>) o -> System.out
// .println(OBJECT_MAPPER.writeValueAsString(o)));
}
@ -166,10 +184,26 @@ public class DumpOrganizationProjectDatasourceTest {
Assertions.assertEquals(5, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Datasource>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
Assertions.assertEquals(3, verificationDataset.filter("indicators is NULL").count());
Assertions.assertEquals(2, verificationDataset.filter("indicators is not NULL").count());
Assertions.assertEquals(1, verificationDataset.filter("indicators is not NULL AND id == 'doajarticles::1fa6859d71faa77b32d82f278c6ed1df'").count());
Assertions.assertEquals(1, verificationDataset.filter("indicators is not NULL AND id == 'doajarticles::9c4b678901e5276d9e3addee566816af'").count());
eu.dnetlib.dhp.oa.model.graph.Datasource p = tmp.filter(pr -> pr.getId().equals("doajarticles::1fa6859d71faa77b32d82f278c6ed1df")).first();
Assertions.assertEquals("47542",p.getIndicators().getUsageCounts().getDownloads());
Assertions.assertEquals("36485",p.getIndicators().getUsageCounts().getViews());
Assertions.assertNull(p.getIndicators().getImpactMeasures());
p = tmp.filter(pr -> pr.getId().equals("doajarticles::9c4b678901e5276d9e3addee566816af")).first();
Assertions.assertEquals("981357",p.getIndicators().getUsageCounts().getDownloads());
Assertions.assertEquals("646539",p.getIndicators().getUsageCounts().getViews());
Assertions.assertNull(p.getIndicators().getImpactMeasures());
//TODO write significant assertions
// verificationDataset
// .foreach(
// (ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Datasource>) o -> System.out
// .println(OBJECT_MAPPER.writeValueAsString(o)));
}
@Test
@ -199,10 +233,11 @@ public class DumpOrganizationProjectDatasourceTest {
Assertions.assertEquals(1, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Datasource>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
//TODO write significant assertions
// verificationDataset
// .foreach(
// (ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Datasource>) o -> System.out
// .println(OBJECT_MAPPER.writeValueAsString(o)));
}
}

View File

@ -472,7 +472,6 @@ public class DumpSubsetTest {
.textFile(workingDir.toString() + "/dump/relation")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
Assertions.assertEquals(10, tmp.count());
Assertions.assertEquals(5, tmp.filter(r -> r.getSourceType().equals("context")).count());
@ -515,7 +514,7 @@ public class DumpSubsetTest {
Assertions.assertEquals(102, tmp.count());
Assertions.assertEquals(51, tmp.filter(r -> r.getSourceType().equals("result") ).count());
Assertions.assertEquals(51, tmp.filter(r -> r.getSourceType().equals("result")).count());
Assertions.assertEquals(39, tmp.filter(r -> r.getSourceType().equals("datasource")).count());
Assertions.assertEquals(12, tmp.filter(r -> r.getSourceType().equals("context")).count());
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long