Compare commits

...

26 Commits

Author SHA1 Message Date
Miriam Baglioni f7714645d2 merge with dump 2021-03-30 16:27:38 +02:00
Miriam Baglioni 4632795f25 merge branch with master 2021-03-30 16:27:23 +02:00
Miriam Baglioni 870ee28dd6 refactoring 2021-03-30 12:55:48 +02:00
Miriam Baglioni 08f8dd9454 refactoring 2021-03-30 12:53:07 +02:00
Miriam Baglioni e5463fea01 added resource for apc dump 2021-03-30 12:47:07 +02:00
Miriam Baglioni 16c1a27852 added test for APC dump 2021-03-30 12:46:42 +02:00
Miriam Baglioni d0c94462e4 refactoring 2021-03-30 12:45:34 +02:00
Miriam Baglioni a896febc02 added APC in the dumped information 2021-03-30 11:13:07 +02:00
Miriam Baglioni 5dea729de3 added article processing charges and modified description 2021-03-30 10:49:39 +02:00
Miriam Baglioni 200e7e9c46 modified description 2021-03-30 10:49:15 +02:00
Miriam Baglioni 931b2a2e15 merge branch with master 2021-03-30 10:27:32 +02:00
Miriam Baglioni 330343937c - 2021-02-24 12:49:27 +01:00
Miriam Baglioni defbb71561 extended resource info to match the new test 2021-02-24 11:52:44 +01:00
Miriam Baglioni 17049f8bde extended the test class - should have done by the start :) 2021-02-24 11:52:07 +01:00
Miriam Baglioni cc11ee1cb9 changed the param value to directly upload on Zenodo 2021-02-24 11:51:40 +01:00
Miriam Baglioni 871e5bea29 should have fixed for real now 2021-02-24 11:51:20 +01:00
Miriam Baglioni 5d92df0627 tried again to fix issue for croatian funder 2021-02-24 10:49:55 +01:00
Miriam Baglioni 9841086ef3 modified code to split the Croazian funders 2021-02-23 18:09:14 +01:00
Miriam Baglioni d4ad740c98 merge branch with master 2021-02-23 11:10:41 +01:00
Miriam Baglioni a684e1065e merge branch with master 2021-02-23 10:45:42 +01:00
Miriam Baglioni f7c35e6311 merge branch with master 2021-02-08 10:39:10 +01:00
Miriam Baglioni 9bdadd4ddb merge branch with master 2021-01-22 11:55:27 +01:00
Miriam Baglioni 0d76e039cf changed to logic to verify in community is contained in the list of context of a result 2020-12-16 10:53:10 +01:00
Miriam Baglioni 7c86e66697 merge branch with master 2020-12-16 10:51:18 +01:00
Miriam Baglioni bc09d37e8c used constants in ModelConstants class 2020-12-10 10:01:23 +01:00
Miriam Baglioni 815c7c11aa merge branch with master 2020-12-03 11:28:01 +01:00
12 changed files with 138 additions and 25 deletions

View File

@ -4,7 +4,7 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable; import java.io.Serializable;
/** /**
* Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: - * Used to refer to the Article Processing Charge information. It contains two parameters: -
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount * currency of type String to store the currency of the APC - amount of type String to stores the charged amount
*/ */
public class APC implements Serializable { public class APC implements Serializable {

View File

@ -12,9 +12,11 @@ import java.util.List;
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary * type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type * (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type
* List<String> list of locations where the instance is accessible. It corresponds to url of the instance to be dumped - * List<String> list of locations where the instance is accessible. It corresponds to url of the instance to be dumped -
* publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type * publicationdate of type String to store the publication date of the instance ;// dateofacceptance;
* String to store information abour tthe review status of the instance. Possible values are 'Unknown', * - refereed of type
* String to store information abour the review status of the instance. Possible values are 'Unknown',
* 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped * 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped
* - articleprocessingcharge of type APC to store the article processing charges possibly associated to the instance
*/ */
public class Instance implements Serializable { public class Instance implements Serializable {
@ -28,6 +30,8 @@ public class Instance implements Serializable {
private String publicationdate;// dateofacceptance; private String publicationdate;// dateofacceptance;
private APC articleprocessingcharge;
private String refereed; // peer-review status private String refereed; // peer-review status
public String getLicense() { public String getLicense() {
@ -78,4 +82,11 @@ public class Instance implements Serializable {
this.refereed = refereed; this.refereed = refereed;
} }
public APC getArticleprocessingcharge() {
return articleprocessingcharge;
}
public void setArticleprocessingcharge(APC articleprocessingcharge) {
this.articleprocessingcharge = articleprocessingcharge;
}
} }

View File

@ -24,8 +24,6 @@ public class Constants {
public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative"; public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
public static String ORCID = "orcid";
static { static {
accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("OPEN", "c_abf2");
accessRightsCoarMap.put("RESTRICTED", "c_16ec"); accessRightsCoarMap.put("RESTRICTED", "c_16ec");

View File

@ -424,6 +424,14 @@ public class ResultMapper implements Serializable {
.ifPresent(value -> instance.setType(value.getClassname())); .ifPresent(value -> instance.setType(value.getClassname()));
Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
Optional<Field<String>> oPca = Optional.ofNullable(i.getProcessingchargeamount());
Optional<Field<String>> oPcc = Optional.ofNullable(i.getProcessingchargecurrency());
if (oPca.isPresent() && oPcc.isPresent()) {
APC apc = new APC();
apc.setCurrency(oPcc.get().getValue());
apc.setAmount(oPca.get().getValue());
instance.setArticleprocessingcharge(apc);
}
} }
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) { private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) {
@ -503,7 +511,7 @@ public class ResultMapper implements Serializable {
private static Pid getOrcid(List<StructuredProperty> p) { private static Pid getOrcid(List<StructuredProperty> p) {
for (StructuredProperty pid : p) { for (StructuredProperty pid : p) {
if (pid.getQualifier().getClassid().equals(Constants.ORCID)) { if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo()); Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
if (di.isPresent()) { if (di.isPresent()) {
return Pid return Pid

View File

@ -57,16 +57,12 @@ public class CommunitySplit implements Serializable {
Dataset<CommunityResult> community_products = result Dataset<CommunityResult> community_products = result
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c)); .filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
try {
community_products.first();
community_products
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/" + c);
} catch (Exception e) {
} community_products
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/" + c);
} }
@ -75,9 +71,9 @@ public class CommunitySplit implements Serializable {
return r return r
.getContext() .getContext()
.stream() .stream()
.filter(con -> con.getCode().equals(c)) .map(con -> con.getCode())
.collect(Collectors.toList()) .collect(Collectors.toList())
.size() > 0; .contains(c);
} }
return false; return false;
} }

View File

@ -102,19 +102,26 @@ public class SparkDumpFunderResults implements Serializable {
} else { } else {
funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase(); funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase();
} }
writeFunderResult(funder, result, outputPath + "/" + funderdump); writeFunderResult(funder, result, outputPath, funderdump);
}); });
} }
private static void writeFunderResult(String funder, Dataset<CommunityResult> results, String outputPath) { private static void dumpResults(String nsp, Dataset<CommunityResult> results, String outputPath,
String funderName) {
results.map((MapFunction<CommunityResult, CommunityResult>) r -> { results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
if (!Optional.ofNullable(r.getProjects()).isPresent()) { if (!Optional.ofNullable(r.getProjects()).isPresent()) {
return null; return null;
} }
for (Project p : r.getProjects()) { for (Project p : r.getProjects()) {
if (p.getId().startsWith(funder)) { if (p.getId().startsWith(nsp)) {
if (nsp.startsWith("40|irb")) {
if (p.getFunder().getShortName().equals(funderName))
return r;
else
return null;
}
return r; return r;
} }
} }
@ -124,7 +131,18 @@ public class SparkDumpFunderResults implements Serializable {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath + "/" + funderName);
}
private static void writeFunderResult(String funder, Dataset<CommunityResult> results, String outputPath,
String funderDump) {
if (funder.startsWith("40|irb")) {
dumpResults(funder, results, outputPath, "HRZZ");
dumpResults(funder, results, outputPath, "MZOS");
} else
dumpResults(funder, results, outputPath, funderDump);
} }
} }

View File

@ -1,9 +1,8 @@
<workflow-app name="dump_funder_results" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="dump_funder_results" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property> <property>
<name>upload</name> <name>upload</name>
<value>false</value> <value>true</value>
<description>true to upload the dump for the funders in Zenodo</description> <description>true to upload the dump for the funders in Zenodo</description>
</property> </property>
<property> <property>

View File

@ -13,6 +13,7 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.*; import org.junit.jupiter.api.*;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -408,4 +409,54 @@ public class DumpJobTest {
} }
@Test
public void testArticlePCA() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(23, verificationDataset.count());
// verificationDataset.show(false);
Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count());
verificationDataset.createOrReplaceTempView("check");
org.apache.spark.sql.Dataset<Row> temp = spark
.sql(
"select id " +
"from check " +
"lateral view explode (instance) i as inst " +
"where inst.articleprocessingcharge is not null");
Assertions.assertTrue(temp.count() == 2);
Assertions.assertTrue(temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1);
Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1);
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
}
} }

View File

@ -137,5 +137,10 @@ public class SplitPerFunderTest {
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(3, tmp.count()); Assertions.assertEquals(3, tmp.count());
// H2020 3
tmp = sc
.textFile(workingDir.toString() + "/split/MZOS")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
} }
} }

View File

@ -5,4 +5,5 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"40|snsf________::50cb15ff7a6a3f8531f063770179e346"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"40|snsf________::50cb15ff7a6a3f8531f063770179e346"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"40|corda__h2020::846b777af165fef7c904a81712a83b66"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"40|corda__h2020::846b777af165fef7c904a81712a83b66"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"40|irb_hr______::1e5e62235d094afd01cd56e65112fc63"}

File diff suppressed because one or more lines are too long