Compare commits
151 Commits
Author | SHA1 | Date |
---|---|---|
Miriam Baglioni | 19cb62ca34 | |
Miriam Baglioni | e0a0dddfac | |
Miriam Baglioni | fcec4b4225 | |
Miriam Baglioni | ec55a22091 | |
Miriam Baglioni | d5000502ea | |
Miriam Baglioni | 47757d2836 | |
Miriam Baglioni | e4b56a4f88 | |
Claudio Atzori | 1a083b2079 | |
Claudio Atzori | 3c6efc142d | |
Miriam Baglioni | 3ad0d6edfc | |
Miriam Baglioni | 5dcb7019a5 | |
Miriam Baglioni | 253ffb42f6 | |
Miriam Baglioni | 874a5ea63e | |
Miriam Baglioni | 53257bc041 | |
Miriam Baglioni | 4f01c5b046 | |
Miriam Baglioni | aed2308299 | |
Miriam Baglioni | ac8ef53d02 | |
Miriam Baglioni | 14e2027bb0 | |
Miriam Baglioni | 4c7eb7d2c3 | |
Miriam Baglioni | 9743216062 | |
Miriam Baglioni | aefec499a9 | |
Miriam Baglioni | 08f0b1c84c | |
Miriam Baglioni | b2ca6b3bb9 | |
Miriam Baglioni | 566c1a9e4e | |
Miriam Baglioni | d170789adc | |
Miriam Baglioni | 0a2c00ce29 | |
Miriam Baglioni | cc86f24372 | |
Miriam Baglioni | d9ca135c1f | |
Miriam Baglioni | 332c02c2c1 | |
Miriam Baglioni | 998048b494 | |
Miriam Baglioni | c7eb1f7dbe | |
Miriam Baglioni | 6c0ffd9824 | |
Miriam Baglioni | e3c1ae809d | |
Miriam Baglioni | 10ea974d56 | |
Miriam Baglioni | 012d4cece6 | |
Miriam Baglioni | 818bb4b11c | |
Miriam Baglioni | e91636817c | |
Miriam Baglioni | eb407ba0d3 | |
Miriam Baglioni | 8cf6a40bdf | |
Miriam Baglioni | 176d6d7f2b | |
Miriam Baglioni | f2b890f8a8 | |
Miriam Baglioni | 5529bbe3cc | |
Miriam Baglioni | 32d64dd7a1 | |
Miriam Baglioni | 9dea4f30ca | |
Miriam Baglioni | 60e2713d56 | |
Miriam Baglioni | 9aec98cea0 | |
Miriam Baglioni | 4885d36b3b | |
Miriam Baglioni | f6677429c7 | |
Miriam Baglioni | c0ce9023a5 | |
Miriam Baglioni | d1f41b8e28 | |
Miriam Baglioni | 24be522e7c | |
Miriam Baglioni | e9aca6b702 | |
Miriam Baglioni | 5fb58362c5 | |
Miriam Baglioni | 097905171a | |
Miriam Baglioni | 6b113961c1 | |
Miriam Baglioni | a175ac2c7f | |
Miriam Baglioni | 2566b97138 | |
Miriam Baglioni | 0482648131 | |
Miriam Baglioni | 5ff50d115a | |
Miriam Baglioni | 81b55dc20b | |
Miriam Baglioni | 7ccd4e7866 | |
Miriam Baglioni | 25be584028 | |
Miriam Baglioni | 21a521b97c | |
Miriam Baglioni | b74d6f1c23 | |
Miriam Baglioni | 787d4d0b4a | |
Miriam Baglioni | b01573e201 | |
Miriam Baglioni | baef25560a | |
Miriam Baglioni | 95125d704a | |
Miriam Baglioni | abc30756e4 | |
Miriam Baglioni | ab791fe424 | |
Miriam Baglioni | 3bfac8bc6e | |
Miriam Baglioni | 9d1b708a89 | |
Miriam Baglioni | 8a44653dbe | |
Miriam Baglioni | b26fb92838 | |
Miriam Baglioni | 29b81bef26 | |
Miriam Baglioni | d53c6850aa | |
Miriam Baglioni | 3fba247c38 | |
Miriam Baglioni | 2ac5c4a9ab | |
Miriam Baglioni | 766288d1c9 | |
Miriam Baglioni | b9d4d67c72 | |
Sandro La Bruzzo | d746390b9f | |
Miriam Baglioni | 72ead1bd85 | |
Sandro La Bruzzo | 6ace388cff | |
Sandro La Bruzzo | d472050ad4 | |
Sandro La Bruzzo | 5d0d14528f | |
Miriam Baglioni | e87b790a60 | |
Miriam Baglioni | 8661bc0c90 | |
Miriam Baglioni | 2e8639f22d | |
Miriam Baglioni | 32983e90d1 | |
Miriam Baglioni | 2e0999a1df | |
Miriam Baglioni | f79b9d5c0d | |
Miriam Baglioni | 21599598ae | |
Miriam Baglioni | 66873c1744 | |
Miriam Baglioni | 7563499740 | |
Miriam Baglioni | f79c06209e | |
Miriam Baglioni | 2ed76d4662 | |
Miriam Baglioni | 44a256fc90 | |
Miriam Baglioni | 636945a5c5 | |
Miriam Baglioni | b9076f9aa8 | |
Miriam Baglioni | acb3c691bc | |
Miriam Baglioni | d0f144d422 | |
Miriam Baglioni | 1fb840ff28 | |
Miriam Baglioni | 011b7737ad | |
Miriam Baglioni | 6ba43a1b67 | |
Miriam Baglioni | 7f57f3cd1e | |
Miriam Baglioni | 1671e78e59 | |
Miriam Baglioni | 563c5d8527 | |
Miriam Baglioni | b6e0c7d660 | |
Miriam Baglioni | 43e9286db2 | |
Miriam Baglioni | 80d51cea56 | |
Miriam Baglioni | f738db860a | |
Miriam Baglioni | 4dcd03b78e | |
Miriam Baglioni | 2cae97d049 | |
Miriam Baglioni | b743dc2960 | |
Miriam Baglioni | 5e36b80dc1 | |
Miriam Baglioni | ad1ba563cd | |
Miriam Baglioni | 8ec02787f2 | |
Miriam Baglioni | 2d2b62386f | |
Miriam Baglioni | 71862838b0 | |
Miriam Baglioni | b26ecd74ea | |
Miriam Baglioni | dc5e79dc64 | |
Miriam Baglioni | 4bedecaa60 | |
Miriam Baglioni | 62d8180891 | |
Miriam Baglioni | db36a9be2e | |
Miriam Baglioni | 45cc165e92 | |
Miriam Baglioni | 0a0e2cfc9c | |
Miriam Baglioni | 054103ae70 | |
Miriam Baglioni | 99fb3dc1d0 | |
Miriam Baglioni | f26378f426 | |
Miriam Baglioni | 67d48763fa | |
Miriam Baglioni | 0bb97fead7 | |
Miriam Baglioni | d3da9ab2c6 | |
Miriam Baglioni | 8878b96204 | |
Miriam Baglioni | 447af1a851 | |
Miriam Baglioni | 956962453f | |
Miriam Baglioni | 5544b049a9 | |
Miriam Baglioni | 31ce13ffb4 | |
Miriam Baglioni | 0a53c29a8f | |
Miriam Baglioni | bdd1cfc1e0 | |
Miriam Baglioni | e222c2c4d7 | |
Miriam Baglioni | 5e8cd02acd | |
Miriam Baglioni | 4b339df43f | |
Miriam Baglioni | 3cc2802a75 | |
Miriam Baglioni | 8a574fee2a | |
Miriam Baglioni | 80e525e0c1 | |
Miriam Baglioni | 3fe35345c3 | |
Claudio Atzori | 6a4589aa2f | |
Miriam Baglioni | eb06474106 | |
Miriam Baglioni | b5ee457969 | |
Miriam Baglioni | 3905afa0c2 | |
Miriam Baglioni | e7eb17f73e |
|
@ -26,3 +26,8 @@ spark-warehouse
|
||||||
/**/*.log
|
/**/*.log
|
||||||
/**/.factorypath
|
/**/.factorypath
|
||||||
/**/.scalafmt.conf
|
/**/.scalafmt.conf
|
||||||
|
/**/job.properties
|
||||||
|
/job.properties
|
||||||
|
/*/job.properties
|
||||||
|
/*/*/job.properties
|
||||||
|
/*/*/*/job.properties
|
|
@ -0,0 +1,43 @@
|
||||||
|
# Contributor Code of Conduct
|
||||||
|
|
||||||
|
Openness, transparency and our community-driven participatory approach guide us in our day-to-day interactions and decision-making. Our open source projects are no exception. Trust, respect, collaboration and transparency are core values we believe should live and breathe within our projects. Our community welcomes participants from around the world with different experiences, unique perspectives, and great ideas to share.
|
||||||
|
|
||||||
|
## Our Pledge
|
||||||
|
|
||||||
|
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
||||||
|
|
||||||
|
## Our Standards
|
||||||
|
|
||||||
|
Examples of behavior that contributes to creating a positive environment include:
|
||||||
|
|
||||||
|
- Using welcoming and inclusive language
|
||||||
|
- Being respectful of differing viewpoints and experiences
|
||||||
|
- Gracefully accepting constructive criticism
|
||||||
|
- Attempting collaboration before conflict
|
||||||
|
- Focusing on what is best for the community
|
||||||
|
- Showing empathy towards other community members
|
||||||
|
|
||||||
|
Examples of unacceptable behavior by participants include:
|
||||||
|
|
||||||
|
- Violence, threats of violence, or inciting others to commit self-harm
|
||||||
|
- The use of sexualized language or imagery and unwelcome sexual attention or advances
|
||||||
|
- Trolling, intentionally spreading misinformation, insulting/derogatory comments, and personal or political attacks
|
||||||
|
- Public or private harassment
|
||||||
|
- Publishing others' private information, such as a physical or electronic address, without explicit permission
|
||||||
|
- Abuse of the reporting process to intentionally harass or exclude others
|
||||||
|
- Advocating for, or encouraging, any of the above behavior
|
||||||
|
- Other conduct which could reasonably be considered inappropriate in a professional setting
|
||||||
|
|
||||||
|
## Our Responsibilities
|
||||||
|
|
||||||
|
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
|
||||||
|
|
||||||
|
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
|
||||||
|
|
||||||
|
## Attribution
|
||||||
|
|
||||||
|
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), [version 1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html).
|
|
@ -0,0 +1,10 @@
|
||||||
|
# Contributing to D-Net Hadoop
|
||||||
|
|
||||||
|
:+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
|
||||||
|
|
||||||
|
This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it).
|
||||||
|
|
||||||
|
The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules.
|
||||||
|
Use your best judgment, and feel free to propose changes to this document in a pull request.
|
||||||
|
|
||||||
|
All contributions are welcome, all contributions will be considered to be contributed under the [project license](#LICENSE.md).
|
|
@ -1,2 +1,8 @@
|
||||||
# dhp-graph-dump
|
# dhp-graph-dump
|
||||||
This module defines the oozie workflows for creating & publishing the OpenAIRE Graph dumps
|
|
||||||
|
This project defines the oozie workflows for creating & publishing the OpenAIRE Graph dumps.
|
||||||
|
|
||||||
|
This project adheres to the Contributor Covenant [code of conduct](CODE_OF_CONDUCT.md).
|
||||||
|
By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it).
|
||||||
|
|
||||||
|
This project is licensed under the [AGPL v3 or later version](#LICENSE.md).
|
|
@ -0,0 +1,49 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-graph-dump</artifactId>
|
||||||
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>api</artifactId>
|
||||||
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>8</maven.compiler.source>
|
||||||
|
<maven.compiler.target>8</maven.compiler.target>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>dom4j</groupId>
|
||||||
|
<artifactId>dom4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>jaxen</groupId>
|
||||||
|
<artifactId>jaxen</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-annotations</artifactId>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
|
@ -0,0 +1,75 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/10/23
|
||||||
|
*/
|
||||||
|
public class QueryCommunityAPI {
|
||||||
|
private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/";
|
||||||
|
|
||||||
|
private static String get(String geturl) throws IOException {
|
||||||
|
URL url = new URL(geturl);
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
conn.setRequestMethod("GET");
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
String body = getBody(conn);
|
||||||
|
conn.disconnect();
|
||||||
|
if (responseCode != HttpURLConnection.HTTP_OK)
|
||||||
|
throw new IOException("Unexpected code " + responseCode + body);
|
||||||
|
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communities() throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/communities");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String community(String id) throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityDatasource(String id) throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityPropagationOrganization(String id) throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityProjects(String id, String page, String size) throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getBody(HttpURLConnection conn) throws IOException {
|
||||||
|
String body = "{}";
|
||||||
|
try (BufferedReader br = new BufferedReader(
|
||||||
|
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
||||||
|
StringBuilder response = new StringBuilder();
|
||||||
|
String responseLine = null;
|
||||||
|
while ((responseLine = br.readLine()) != null) {
|
||||||
|
response.append(responseLine.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
body = response.toString();
|
||||||
|
|
||||||
|
}
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
@JsonAutoDetect
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class CommunityContentprovider {
|
||||||
|
private String openaireId;
|
||||||
|
|
||||||
|
private String enabled;
|
||||||
|
|
||||||
|
public String getEnabled() {
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEnabled(String enabled) {
|
||||||
|
this.enabled = enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOpenaireId() {
|
||||||
|
return openaireId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenaireId(final String openaireId) {
|
||||||
|
this.openaireId = openaireId;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class CommunityEntityMap extends HashMap<String, List<String>> {
|
||||||
|
|
||||||
|
public CommunityEntityMap() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> get(String key) {
|
||||||
|
|
||||||
|
if (super.get(key) == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
return super.get(key);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,82 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/10/23
|
||||||
|
*/
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class CommunityModel implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String name;
|
||||||
|
private String description;
|
||||||
|
|
||||||
|
private String status;
|
||||||
|
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
private List<String> subjects;
|
||||||
|
|
||||||
|
private String zenodoCommunity;
|
||||||
|
|
||||||
|
public List<String> getSubjects() {
|
||||||
|
return subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubjects(List<String> subjects) {
|
||||||
|
this.subjects = subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getZenodoCommunity() {
|
||||||
|
return zenodoCommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setZenodoCommunity(String zenodoCommunity) {
|
||||||
|
this.zenodoCommunity = zenodoCommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStatus() {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStatus(String status) {
|
||||||
|
this.status = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/10/23
|
||||||
|
*/
|
||||||
|
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
|
||||||
|
public CommunitySummary() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/10/23
|
||||||
|
*/
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class ContentModel implements Serializable {
|
||||||
|
private List<ProjectModel> content;
|
||||||
|
private Integer totalPages;
|
||||||
|
private Boolean last;
|
||||||
|
private Integer number;
|
||||||
|
|
||||||
|
public List<ProjectModel> getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContent(List<ProjectModel> content) {
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getTotalPages() {
|
||||||
|
return totalPages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTotalPages(Integer totalPages) {
|
||||||
|
this.totalPages = totalPages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Boolean getLast() {
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLast(Boolean last) {
|
||||||
|
this.last = last;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getNumber() {
|
||||||
|
return number;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNumber(Integer number) {
|
||||||
|
this.number = number;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
|
||||||
|
public DatasourceList() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/10/23
|
||||||
|
*/
|
||||||
|
public class OrganizationList extends ArrayList<String> implements Serializable {
|
||||||
|
|
||||||
|
public OrganizationList() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/10/23
|
||||||
|
*/
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class ProjectModel implements Serializable {
|
||||||
|
|
||||||
|
private String openaireId;
|
||||||
|
|
||||||
|
private String funder;
|
||||||
|
|
||||||
|
private String gratId;
|
||||||
|
|
||||||
|
public String getFunder() {
|
||||||
|
return funder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFunder(String funder) {
|
||||||
|
this.funder = funder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getGratId() {
|
||||||
|
return gratId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGratId(String gratId) {
|
||||||
|
this.gratId = gratId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOpenaireId() {
|
||||||
|
return openaireId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenaireId(String openaireId) {
|
||||||
|
this.openaireId = openaireId;
|
||||||
|
}
|
||||||
|
}
|
|
@ -40,6 +40,7 @@ public class ExecCreateSchemas {
|
||||||
.get(Paths.get(getClass().getResource("/").getPath()).toAbsolutePath() + directory)
|
.get(Paths.get(getClass().getResource("/").getPath()).toAbsolutePath() + directory)
|
||||||
.toString();
|
.toString();
|
||||||
|
|
||||||
|
System.out.println(dir);
|
||||||
if (!Files.exists(Paths.get(dir))) {
|
if (!Files.exists(Paths.get(dir))) {
|
||||||
Files.createDirectories(Paths.get(dir));
|
Files.createDirectories(Paths.get(dir));
|
||||||
}
|
}
|
||||||
|
@ -69,6 +70,5 @@ public class ExecCreateSchemas {
|
||||||
|
|
||||||
ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json");
|
ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json");
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 07/11/22
|
||||||
|
*/
|
||||||
|
public class ImpactIndicators implements Serializable {
|
||||||
|
Score influence;
|
||||||
|
Score influence_alt;
|
||||||
|
Score popularity;
|
||||||
|
Score popularity_alt;
|
||||||
|
Score impulse;
|
||||||
|
|
||||||
|
public Score getInfluence() {
|
||||||
|
return influence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInfluence(Score influence) {
|
||||||
|
this.influence = influence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Score getInfluence_alt() {
|
||||||
|
return influence_alt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInfluence_alt(Score influence_alt) {
|
||||||
|
this.influence_alt = influence_alt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Score getPopularity() {
|
||||||
|
return popularity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPopularity(Score popularity) {
|
||||||
|
this.popularity = popularity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Score getPopularity_alt() {
|
||||||
|
return popularity_alt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPopularity_alt(Score popularity_alt) {
|
||||||
|
this.popularity_alt = popularity_alt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Score getImpulse() {
|
||||||
|
return impulse;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setImpulse(Score impulse) {
|
||||||
|
this.impulse = impulse;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
|
public class Indicator implements Serializable {
|
||||||
|
@JsonSchema(description = "The impact measures (i.e. popularity)")
|
||||||
|
List<Score> bipIndicators;
|
||||||
|
|
||||||
|
@JsonSchema(description = "The usage counts (i.e. downloads)")
|
||||||
|
UsageCounts usageCounts;
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public List<Score> getBipIndicators() {
|
||||||
|
return bipIndicators;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBipIndicators(List<Score> bipIndicators) {
|
||||||
|
this.bipIndicators = bipIndicators;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public UsageCounts getUsageCounts() {
|
||||||
|
return usageCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUsageCounts(UsageCounts usageCounts) {
|
||||||
|
this.usageCounts = usageCounts;
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.model;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -25,8 +26,8 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
*/
|
*/
|
||||||
public class Instance implements Serializable {
|
public class Instance implements Serializable {
|
||||||
|
|
||||||
@JsonSchema(description = "Measures computed for this instance, for example Bip!Finder ones")
|
// @JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones")
|
||||||
private List<Measure> measures;
|
// private Indicator indicators;
|
||||||
|
|
||||||
private List<ResultPid> pid;
|
private List<ResultPid> pid;
|
||||||
|
|
||||||
|
@ -59,6 +60,7 @@ public class Instance implements Serializable {
|
||||||
"nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)")
|
"nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)")
|
||||||
private String refereed; // peer-review status
|
private String refereed; // peer-review status
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getLicense() {
|
public String getLicense() {
|
||||||
return license;
|
return license;
|
||||||
}
|
}
|
||||||
|
@ -67,6 +69,7 @@ public class Instance implements Serializable {
|
||||||
this.license = license;
|
this.license = license;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public AccessRight getAccessright() {
|
public AccessRight getAccessright() {
|
||||||
return accessright;
|
return accessright;
|
||||||
}
|
}
|
||||||
|
@ -75,6 +78,7 @@ public class Instance implements Serializable {
|
||||||
this.accessright = accessright;
|
this.accessright = accessright;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
@ -83,6 +87,7 @@ public class Instance implements Serializable {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getUrl() {
|
public List<String> getUrl() {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
@ -91,6 +96,7 @@ public class Instance implements Serializable {
|
||||||
this.url = url;
|
this.url = url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getPublicationdate() {
|
public String getPublicationdate() {
|
||||||
return publicationdate;
|
return publicationdate;
|
||||||
}
|
}
|
||||||
|
@ -99,6 +105,7 @@ public class Instance implements Serializable {
|
||||||
this.publicationdate = publicationdate;
|
this.publicationdate = publicationdate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getRefereed() {
|
public String getRefereed() {
|
||||||
return refereed;
|
return refereed;
|
||||||
}
|
}
|
||||||
|
@ -107,6 +114,7 @@ public class Instance implements Serializable {
|
||||||
this.refereed = refereed;
|
this.refereed = refereed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public APC getArticleprocessingcharge() {
|
public APC getArticleprocessingcharge() {
|
||||||
return articleprocessingcharge;
|
return articleprocessingcharge;
|
||||||
}
|
}
|
||||||
|
@ -115,6 +123,7 @@ public class Instance implements Serializable {
|
||||||
this.articleprocessingcharge = articleprocessingcharge;
|
this.articleprocessingcharge = articleprocessingcharge;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<ResultPid> getPid() {
|
public List<ResultPid> getPid() {
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
|
@ -123,6 +132,7 @@ public class Instance implements Serializable {
|
||||||
this.pid = pid;
|
this.pid = pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<AlternateIdentifier> getAlternateIdentifier() {
|
public List<AlternateIdentifier> getAlternateIdentifier() {
|
||||||
return alternateIdentifier;
|
return alternateIdentifier;
|
||||||
}
|
}
|
||||||
|
@ -131,11 +141,12 @@ public class Instance implements Serializable {
|
||||||
this.alternateIdentifier = alternateIdentifier;
|
this.alternateIdentifier = alternateIdentifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Measure> getMeasures() {
|
// @JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
return measures;
|
// public Indicator getIndicators() {
|
||||||
}
|
// return indicators;
|
||||||
|
// }
|
||||||
public void setMeasures(List<Measure> measures) {
|
//
|
||||||
this.measures = measures;
|
// public void setIndicators(Indicator indicators) {
|
||||||
}
|
// this.indicators = indicators;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,8 +8,12 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 03/08/22
|
||||||
|
*/
|
||||||
public class Measure implements Serializable {
|
public class Measure implements Serializable {
|
||||||
@JsonSchema(description = "The measure (i.e. popularity)")
|
@JsonSchema(description = "The measure (i.e. class)")
|
||||||
private String key;
|
private String key;
|
||||||
|
|
||||||
@JsonSchema(description = "The value for that measure")
|
@JsonSchema(description = "The value for that measure")
|
||||||
|
@ -32,15 +36,14 @@ public class Measure implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Measure newInstance(String key, String value) {
|
public static Measure newInstance(String key, String value) {
|
||||||
Measure inst = new Measure();
|
Measure mes = new Measure();
|
||||||
inst.key = key;
|
mes.key = key;
|
||||||
inst.value = value;
|
mes.value = value;
|
||||||
return inst;
|
return mes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public boolean isBlank() {
|
public boolean isBlank() {
|
||||||
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
|
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.model;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 19/12/23
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* The OpenAccess color meant to be used on the result level
|
||||||
|
*/
|
||||||
|
public enum OpenAccessColor {
|
||||||
|
|
||||||
|
gold, hybrid, bronze
|
||||||
|
|
||||||
|
}
|
|
@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.model;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -73,6 +75,53 @@ public class Result implements Serializable {
|
||||||
private List<Author> author;
|
private List<Author> author;
|
||||||
|
|
||||||
// resulttype allows subclassing results into publications | datasets | software
|
// resulttype allows subclassing results into publications | datasets | software
|
||||||
|
|
||||||
|
@JsonProperty("isGreen")
|
||||||
|
@JsonSchema(description = "True if the result is green Open Access")
|
||||||
|
private Boolean isGreen;
|
||||||
|
|
||||||
|
@JsonSchema(description = "The Open Access Color of the publication")
|
||||||
|
private OpenAccessColor openAccessColor;
|
||||||
|
|
||||||
|
@JsonProperty("isInDiamondJournal")
|
||||||
|
@JsonSchema(description = "True if the result is published in a Diamond Journal")
|
||||||
|
private Boolean isInDiamondJournal;
|
||||||
|
|
||||||
|
@JsonSchema(description = "True if the result is outcome of a project")
|
||||||
|
private Boolean publiclyFunded;
|
||||||
|
|
||||||
|
public Boolean getGreen() {
|
||||||
|
return isGreen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGreen(Boolean green) {
|
||||||
|
isGreen = green;
|
||||||
|
}
|
||||||
|
|
||||||
|
public OpenAccessColor getOpenAccessColor() {
|
||||||
|
return openAccessColor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenAccessColor(OpenAccessColor openAccessColor) {
|
||||||
|
this.openAccessColor = openAccessColor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Boolean getInDiamondJournal() {
|
||||||
|
return isInDiamondJournal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInDiamondJournal(Boolean inDiamondJournal) {
|
||||||
|
isInDiamondJournal = inDiamondJournal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Boolean getPubliclyFunded() {
|
||||||
|
return publiclyFunded;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPubliclyFunded(Boolean publiclyFunded) {
|
||||||
|
this.publiclyFunded = publiclyFunded;
|
||||||
|
}
|
||||||
|
|
||||||
@JsonSchema(
|
@JsonSchema(
|
||||||
description = "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)")
|
description = "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)")
|
||||||
private String type; // resulttype
|
private String type; // resulttype
|
||||||
|
@ -168,6 +217,19 @@ public class Result implements Serializable {
|
||||||
@JsonSchema(description = "Timestamp of last update of the record in OpenAIRE")
|
@JsonSchema(description = "Timestamp of last update of the record in OpenAIRE")
|
||||||
private Long lastupdatetimestamp;
|
private Long lastupdatetimestamp;
|
||||||
|
|
||||||
|
@JsonSchema(description = "Indicators computed for this result, for example UsageCount ones")
|
||||||
|
private Indicator indicators;
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public Indicator getIndicators() {
|
||||||
|
return indicators;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIndicators(Indicator indicators) {
|
||||||
|
this.indicators = indicators;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public Long getLastupdatetimestamp() {
|
public Long getLastupdatetimestamp() {
|
||||||
return lastupdatetimestamp;
|
return lastupdatetimestamp;
|
||||||
}
|
}
|
||||||
|
@ -176,6 +238,7 @@ public class Result implements Serializable {
|
||||||
this.lastupdatetimestamp = lastupdatetimestamp;
|
this.lastupdatetimestamp = lastupdatetimestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getId() {
|
public String getId() {
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
@ -184,6 +247,7 @@ public class Result implements Serializable {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getOriginalId() {
|
public List<String> getOriginalId() {
|
||||||
return originalId;
|
return originalId;
|
||||||
}
|
}
|
||||||
|
@ -192,6 +256,7 @@ public class Result implements Serializable {
|
||||||
this.originalId = originalId;
|
this.originalId = originalId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<ResultPid> getPid() {
|
public List<ResultPid> getPid() {
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
|
@ -200,6 +265,7 @@ public class Result implements Serializable {
|
||||||
this.pid = pid;
|
this.pid = pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getDateofcollection() {
|
public String getDateofcollection() {
|
||||||
return dateofcollection;
|
return dateofcollection;
|
||||||
}
|
}
|
||||||
|
@ -208,10 +274,12 @@ public class Result implements Serializable {
|
||||||
this.dateofcollection = dateofcollection;
|
this.dateofcollection = dateofcollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<Author> getAuthor() {
|
public List<Author> getAuthor() {
|
||||||
return author;
|
return author;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
@ -220,6 +288,7 @@ public class Result implements Serializable {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public Container getContainer() {
|
public Container getContainer() {
|
||||||
return container;
|
return container;
|
||||||
}
|
}
|
||||||
|
@ -232,6 +301,7 @@ public class Result implements Serializable {
|
||||||
this.author = author;
|
this.author = author;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public Language getLanguage() {
|
public Language getLanguage() {
|
||||||
return language;
|
return language;
|
||||||
}
|
}
|
||||||
|
@ -240,6 +310,7 @@ public class Result implements Serializable {
|
||||||
this.language = language;
|
this.language = language;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<ResultCountry> getCountry() {
|
public List<ResultCountry> getCountry() {
|
||||||
return country;
|
return country;
|
||||||
}
|
}
|
||||||
|
@ -248,6 +319,7 @@ public class Result implements Serializable {
|
||||||
this.country = country;
|
this.country = country;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<Subject> getSubjects() {
|
public List<Subject> getSubjects() {
|
||||||
return subjects;
|
return subjects;
|
||||||
}
|
}
|
||||||
|
@ -256,6 +328,7 @@ public class Result implements Serializable {
|
||||||
this.subjects = subjects;
|
this.subjects = subjects;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getMaintitle() {
|
public String getMaintitle() {
|
||||||
return maintitle;
|
return maintitle;
|
||||||
}
|
}
|
||||||
|
@ -264,6 +337,7 @@ public class Result implements Serializable {
|
||||||
this.maintitle = maintitle;
|
this.maintitle = maintitle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getSubtitle() {
|
public String getSubtitle() {
|
||||||
return subtitle;
|
return subtitle;
|
||||||
}
|
}
|
||||||
|
@ -272,6 +346,7 @@ public class Result implements Serializable {
|
||||||
this.subtitle = subtitle;
|
this.subtitle = subtitle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getDescription() {
|
public List<String> getDescription() {
|
||||||
return description;
|
return description;
|
||||||
}
|
}
|
||||||
|
@ -280,6 +355,7 @@ public class Result implements Serializable {
|
||||||
this.description = description;
|
this.description = description;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getPublicationdate() {
|
public String getPublicationdate() {
|
||||||
return publicationdate;
|
return publicationdate;
|
||||||
}
|
}
|
||||||
|
@ -288,6 +364,7 @@ public class Result implements Serializable {
|
||||||
this.publicationdate = publicationdate;
|
this.publicationdate = publicationdate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getPublisher() {
|
public String getPublisher() {
|
||||||
return publisher;
|
return publisher;
|
||||||
}
|
}
|
||||||
|
@ -296,6 +373,7 @@ public class Result implements Serializable {
|
||||||
this.publisher = publisher;
|
this.publisher = publisher;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getEmbargoenddate() {
|
public String getEmbargoenddate() {
|
||||||
return embargoenddate;
|
return embargoenddate;
|
||||||
}
|
}
|
||||||
|
@ -304,6 +382,7 @@ public class Result implements Serializable {
|
||||||
this.embargoenddate = embargoenddate;
|
this.embargoenddate = embargoenddate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getSource() {
|
public List<String> getSource() {
|
||||||
return source;
|
return source;
|
||||||
}
|
}
|
||||||
|
@ -312,6 +391,7 @@ public class Result implements Serializable {
|
||||||
this.source = source;
|
this.source = source;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getFormat() {
|
public List<String> getFormat() {
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
@ -320,6 +400,7 @@ public class Result implements Serializable {
|
||||||
this.format = format;
|
this.format = format;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getContributor() {
|
public List<String> getContributor() {
|
||||||
return contributor;
|
return contributor;
|
||||||
}
|
}
|
||||||
|
@ -328,6 +409,7 @@ public class Result implements Serializable {
|
||||||
this.contributor = contributor;
|
this.contributor = contributor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getCoverage() {
|
public List<String> getCoverage() {
|
||||||
return coverage;
|
return coverage;
|
||||||
}
|
}
|
||||||
|
@ -336,6 +418,7 @@ public class Result implements Serializable {
|
||||||
this.coverage = coverage;
|
this.coverage = coverage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public BestAccessRight getBestaccessright() {
|
public BestAccessRight getBestaccessright() {
|
||||||
return bestaccessright;
|
return bestaccessright;
|
||||||
}
|
}
|
||||||
|
@ -344,6 +427,7 @@ public class Result implements Serializable {
|
||||||
this.bestaccessright = bestaccessright;
|
this.bestaccessright = bestaccessright;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getDocumentationUrl() {
|
public List<String> getDocumentationUrl() {
|
||||||
return documentationUrl;
|
return documentationUrl;
|
||||||
}
|
}
|
||||||
|
@ -352,6 +436,7 @@ public class Result implements Serializable {
|
||||||
this.documentationUrl = documentationUrl;
|
this.documentationUrl = documentationUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getCodeRepositoryUrl() {
|
public String getCodeRepositoryUrl() {
|
||||||
return codeRepositoryUrl;
|
return codeRepositoryUrl;
|
||||||
}
|
}
|
||||||
|
@ -360,6 +445,7 @@ public class Result implements Serializable {
|
||||||
this.codeRepositoryUrl = codeRepositoryUrl;
|
this.codeRepositoryUrl = codeRepositoryUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getProgrammingLanguage() {
|
public String getProgrammingLanguage() {
|
||||||
return programmingLanguage;
|
return programmingLanguage;
|
||||||
}
|
}
|
||||||
|
@ -368,6 +454,7 @@ public class Result implements Serializable {
|
||||||
this.programmingLanguage = programmingLanguage;
|
this.programmingLanguage = programmingLanguage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getContactperson() {
|
public List<String> getContactperson() {
|
||||||
return contactperson;
|
return contactperson;
|
||||||
}
|
}
|
||||||
|
@ -376,6 +463,7 @@ public class Result implements Serializable {
|
||||||
this.contactperson = contactperson;
|
this.contactperson = contactperson;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getContactgroup() {
|
public List<String> getContactgroup() {
|
||||||
return contactgroup;
|
return contactgroup;
|
||||||
}
|
}
|
||||||
|
@ -384,6 +472,7 @@ public class Result implements Serializable {
|
||||||
this.contactgroup = contactgroup;
|
this.contactgroup = contactgroup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<String> getTool() {
|
public List<String> getTool() {
|
||||||
return tool;
|
return tool;
|
||||||
}
|
}
|
||||||
|
@ -392,6 +481,7 @@ public class Result implements Serializable {
|
||||||
this.tool = tool;
|
this.tool = tool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getSize() {
|
public String getSize() {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -400,6 +490,7 @@ public class Result implements Serializable {
|
||||||
this.size = size;
|
this.size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public String getVersion() {
|
public String getVersion() {
|
||||||
return version;
|
return version;
|
||||||
}
|
}
|
||||||
|
@ -408,6 +499,7 @@ public class Result implements Serializable {
|
||||||
this.version = version;
|
this.version = version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<GeoLocation> getGeolocation() {
|
public List<GeoLocation> getGeolocation() {
|
||||||
return geolocation;
|
return geolocation;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonGetter;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonSetter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 07/11/22
|
||||||
|
*/
|
||||||
|
public class Score implements Serializable {
|
||||||
|
private String indicator;
|
||||||
|
private String score;
|
||||||
|
|
||||||
|
@JsonProperty("class")
|
||||||
|
private String clazz;
|
||||||
|
|
||||||
|
public String getScore() {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScore(String score) {
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonGetter("class")
|
||||||
|
public String getClazz() {
|
||||||
|
return clazz;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonSetter("class")
|
||||||
|
public void setClazz(String clazz) {
|
||||||
|
this.clazz = clazz;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getIndicator() {
|
||||||
|
return indicator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIndicator(String indicator) {
|
||||||
|
this.indicator = indicator;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 07/11/22
|
||||||
|
*/
|
||||||
|
public class UsageCounts implements Serializable {
|
||||||
|
private String downloads;
|
||||||
|
private String views;
|
||||||
|
|
||||||
|
public String getDownloads() {
|
||||||
|
return downloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDownloads(String downloads) {
|
||||||
|
this.downloads = downloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getViews() {
|
||||||
|
return views;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setViews(String views) {
|
||||||
|
this.views = views;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.model.community;
|
package eu.dnetlib.dhp.oa.model.community;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.model.Instance;
|
import eu.dnetlib.dhp.oa.model.Instance;
|
||||||
|
@ -22,6 +23,7 @@ public class CommunityInstance extends Instance {
|
||||||
@JsonSchema(description = "Information about the source from which the record has been collected")
|
@JsonSchema(description = "Information about the source from which the record has been collected")
|
||||||
private CfHbKeyValue collectedfrom;
|
private CfHbKeyValue collectedfrom;
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public CfHbKeyValue getHostedby() {
|
public CfHbKeyValue getHostedby() {
|
||||||
return hostedby;
|
return hostedby;
|
||||||
}
|
}
|
||||||
|
@ -30,6 +32,7 @@ public class CommunityInstance extends Instance {
|
||||||
this.hostedby = hostedby;
|
this.hostedby = hostedby;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public CfHbKeyValue getCollectedfrom() {
|
public CfHbKeyValue getCollectedfrom() {
|
||||||
return collectedfrom;
|
return collectedfrom;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.model.community;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.model.Result;
|
import eu.dnetlib.dhp.oa.model.Result;
|
||||||
|
@ -35,6 +36,7 @@ public class CommunityResult extends Result {
|
||||||
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
|
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
|
||||||
private List<CommunityInstance> instance;
|
private List<CommunityInstance> instance;
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<CommunityInstance> getInstance() {
|
public List<CommunityInstance> getInstance() {
|
||||||
return instance;
|
return instance;
|
||||||
}
|
}
|
||||||
|
@ -43,6 +45,7 @@ public class CommunityResult extends Result {
|
||||||
this.instance = instance;
|
this.instance = instance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<CfHbKeyValue> getCollectedfrom() {
|
public List<CfHbKeyValue> getCollectedfrom() {
|
||||||
return collectedfrom;
|
return collectedfrom;
|
||||||
}
|
}
|
||||||
|
@ -51,6 +54,7 @@ public class CommunityResult extends Result {
|
||||||
this.collectedfrom = collectedfrom;
|
this.collectedfrom = collectedfrom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<Project> getProjects() {
|
public List<Project> getProjects() {
|
||||||
return projects;
|
return projects;
|
||||||
}
|
}
|
||||||
|
@ -59,6 +63,7 @@ public class CommunityResult extends Result {
|
||||||
this.projects = projects;
|
this.projects = projects;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public List<Context> getContext() {
|
public List<Context> getContext() {
|
||||||
return context;
|
return context;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ import java.util.List;
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.model.Container;
|
import eu.dnetlib.dhp.oa.model.Container;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Indicator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
|
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
|
||||||
|
@ -128,6 +129,17 @@ public class Datasource implements Serializable {
|
||||||
@JsonSchema(description = "Information about the journal, if this data source is of type Journal.")
|
@JsonSchema(description = "Information about the journal, if this data source is of type Journal.")
|
||||||
private Container journal; // issn etc del Journal
|
private Container journal; // issn etc del Journal
|
||||||
|
|
||||||
|
// @JsonSchema(description = "Indicators computed for this Datasource, for example UsageCount ones")
|
||||||
|
// private Indicator indicators;
|
||||||
|
//
|
||||||
|
// public Indicator getIndicators() {
|
||||||
|
// return indicators;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// public void setIndicators(Indicator indicators) {
|
||||||
|
// this.indicators = indicators;
|
||||||
|
// }
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,38 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.model.graph;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To represent the generic node in a relation. It has the following parameters: - private String id the openaire id of
|
|
||||||
* the entity in the relation - private String type the type of the entity in the relation. Consider the generic
|
|
||||||
* relation between a Result R and a Project P, the node representing R will have as id the id of R and as type result,
|
|
||||||
* while the node representing the project will have as id the id of the project and as type project
|
|
||||||
*/
|
|
||||||
public class Node implements Serializable {
|
|
||||||
private String id;
|
|
||||||
private String type;
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getType() {
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setType(String type) {
|
|
||||||
this.type = type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Node newInstance(String id, String type) {
|
|
||||||
Node node = new Node();
|
|
||||||
node.id = id;
|
|
||||||
node.type = type;
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -6,6 +6,8 @@ import java.util.List;
|
||||||
|
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.model.Indicator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
|
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
|
||||||
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
|
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
|
||||||
|
@ -68,6 +70,17 @@ public class Project implements Serializable {
|
||||||
@JsonSchema(description = "The h2020 programme funding the project")
|
@JsonSchema(description = "The h2020 programme funding the project")
|
||||||
private List<Programme> h2020programme;
|
private List<Programme> h2020programme;
|
||||||
|
|
||||||
|
// @JsonSchema(description = "Indicators computed for this project, for example UsageCount ones")
|
||||||
|
// private Indicator indicators;
|
||||||
|
//
|
||||||
|
// public Indicator getIndicators() {
|
||||||
|
// return indicators;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// public void setIndicators(Indicator indicators) {
|
||||||
|
// this.indicators = indicators;
|
||||||
|
// }
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,11 +15,17 @@ import eu.dnetlib.dhp.oa.model.Provenance;
|
||||||
* provenance of the relation
|
* provenance of the relation
|
||||||
*/
|
*/
|
||||||
public class Relation implements Serializable {
|
public class Relation implements Serializable {
|
||||||
@JsonSchema(description = "The node source in the relation")
|
@JsonSchema(description = "The identifier of the source in the relation")
|
||||||
private Node source;
|
private String source;
|
||||||
|
|
||||||
@JsonSchema(description = "The node target in the relation")
|
@JsonSchema(description = "The entity type of the source in the relation")
|
||||||
private Node target;
|
private String sourceType;
|
||||||
|
|
||||||
|
@JsonSchema(description = "The identifier of the target in the relation")
|
||||||
|
private String target;
|
||||||
|
|
||||||
|
@JsonSchema(description = "The entity type of the target in the relation")
|
||||||
|
private String targetType;
|
||||||
|
|
||||||
@JsonSchema(description = "To represent the semantics of a relation between two entities")
|
@JsonSchema(description = "To represent the semantics of a relation between two entities")
|
||||||
private RelType reltype;
|
private RelType reltype;
|
||||||
|
@ -34,22 +40,38 @@ public class Relation implements Serializable {
|
||||||
@JsonSchema(description = "The date when the relation was collected from OpenAIRE")
|
@JsonSchema(description = "The date when the relation was collected from OpenAIRE")
|
||||||
private String validationDate;
|
private String validationDate;
|
||||||
|
|
||||||
public Node getSource() {
|
public String getSource() {
|
||||||
return source;
|
return source;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSource(Node source) {
|
public void setSource(String source) {
|
||||||
this.source = source;
|
this.source = source;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Node getTarget() {
|
public String getSourceType() {
|
||||||
|
return sourceType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceType(String sourceType) {
|
||||||
|
this.sourceType = sourceType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTarget() {
|
||||||
return target;
|
return target;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTarget(Node target) {
|
public void setTarget(String target) {
|
||||||
this.target = target;
|
this.target = target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getTargetType() {
|
||||||
|
return targetType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetType(String targetType) {
|
||||||
|
this.targetType = targetType;
|
||||||
|
}
|
||||||
|
|
||||||
public RelType getReltype() {
|
public RelType getReltype() {
|
||||||
return reltype;
|
return reltype;
|
||||||
}
|
}
|
||||||
|
@ -85,13 +107,16 @@ public class Relation implements Serializable {
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
|
|
||||||
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
|
return Objects.hash(source, target, reltype.getType() + ":" + reltype.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
|
public static Relation newInstance(String source, String sourceType, String target, String targetType,
|
||||||
|
RelType reltype, Provenance provenance) {
|
||||||
Relation relation = new Relation();
|
Relation relation = new Relation();
|
||||||
relation.source = source;
|
relation.source = source;
|
||||||
|
relation.sourceType = sourceType;
|
||||||
relation.target = target;
|
relation.target = target;
|
||||||
|
relation.targetType = targetType;
|
||||||
relation.reltype = reltype;
|
relation.reltype = reltype;
|
||||||
relation.provenance = provenance;
|
relation.provenance = provenance;
|
||||||
return relation;
|
return relation;
|
||||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.model.graph;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -12,6 +13,8 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
|
||||||
public class ResearchCommunity extends ResearchInitiative {
|
public class ResearchCommunity extends ResearchInitiative {
|
||||||
@JsonSchema(
|
@JsonSchema(
|
||||||
description = "Only for research communities: the list of the subjects associated to the research community")
|
description = "Only for research communities: the list of the subjects associated to the research community")
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
private List<String> subject;
|
private List<String> subject;
|
||||||
|
|
||||||
public List<String> getSubject() {
|
public List<String> getSubject() {
|
||||||
|
|
|
@ -1,35 +1,38 @@
|
||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
"$schema" : "http://json-schema.org/draft-07/schema#",
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"acronym": {
|
"acronym" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The acronym of the community"
|
"description" : "The acronym of the community"
|
||||||
},
|
},
|
||||||
"description": {
|
"description" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Description of the research community/research infrastructure"
|
"description" : "Description of the research community/research infrastructure"
|
||||||
},
|
},
|
||||||
"id": {
|
"id" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "OpenAIRE id of the research community/research infrastructure"
|
"description" : "The OpenAIRE id for the community/research infrastructure"
|
||||||
},
|
},
|
||||||
"name": {
|
"name" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The long name of the community"
|
"description" : "The long name of the community"
|
||||||
},
|
},
|
||||||
"subject": {
|
"subject" : {
|
||||||
"description": "Only for research communities: the list of the subjects associated to the research community",
|
"description" : "Only for research communities: the list of the subjects associated to the research community",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {"type": "string"}
|
"items" : {
|
||||||
},
|
"type" : "string",
|
||||||
"type": {
|
"description" : "Only for research communities: the list of the subjects associated to the research community"
|
||||||
"type": "string",
|
}
|
||||||
"description": "One of {Research Community, Research infrastructure}"
|
},
|
||||||
},
|
"type" : {
|
||||||
"zenodo_community": {
|
"type" : "string",
|
||||||
"type": "string",
|
"description" : "One of {Research Community, Research infrastructure}"
|
||||||
"description": "The URL of the Zenodo community associated to the Research community/Research infrastructure"
|
},
|
||||||
}
|
"zenodo_community" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The URL of the Zenodo community associated to the Research community/Research infrastructure"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,684 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"definitions": {
|
||||||
|
"CfHbKeyValue": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"key": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of key"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Provenance": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"provenance": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of provenance"
|
||||||
|
},
|
||||||
|
"trust": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of trust"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ResultPid": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"author": {
|
||||||
|
"description": "Description of author",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"fullname": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of fullname"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of name"
|
||||||
|
},
|
||||||
|
"pid": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of id"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Description of provenance"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of pid"
|
||||||
|
},
|
||||||
|
"rank": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Description of rank"
|
||||||
|
},
|
||||||
|
"surname": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of surname"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of author"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bestaccessright": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of code"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of label"
|
||||||
|
},
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of bestaccessright"
|
||||||
|
},
|
||||||
|
"codeRepositoryUrl": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of codeRepositoryUrl"
|
||||||
|
},
|
||||||
|
"collectedfrom": {
|
||||||
|
"description": "Description of collectedfrom",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/CfHbKeyValue"},
|
||||||
|
{"description": "Description of collectedfrom"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contactgroup": {
|
||||||
|
"description": "Description of contactgroup",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of contactgroup"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contactperson": {
|
||||||
|
"description": "Description of contactperson",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of contactperson"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"container": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"conferencedate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of conferencedate"
|
||||||
|
},
|
||||||
|
"conferenceplace": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of conferenceplace"
|
||||||
|
},
|
||||||
|
"edition": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of edition"
|
||||||
|
},
|
||||||
|
"ep": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of ep"
|
||||||
|
},
|
||||||
|
"iss": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of iss"
|
||||||
|
},
|
||||||
|
"issnLinking": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of issnLinking"
|
||||||
|
},
|
||||||
|
"issnOnline": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of issnOnline"
|
||||||
|
},
|
||||||
|
"issnPrinted": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of issnPrinted"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of name"
|
||||||
|
},
|
||||||
|
"sp": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of sp"
|
||||||
|
},
|
||||||
|
"vol": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of vol"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of container"
|
||||||
|
},
|
||||||
|
"context": {
|
||||||
|
"description": "Description of context",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of code"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of label"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"description": "Description of provenance",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Description of provenance"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of context"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contributor": {
|
||||||
|
"description": "Description of contributor",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of contributor"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"country": {
|
||||||
|
"description": "Description of country",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of code"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of label"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Description of provenance"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of country"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"coverage": {
|
||||||
|
"description": "Description of coverage",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of coverage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"dateofcollection": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of dateofcollection"
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"description": "Description of description",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of description"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"documentationUrl": {
|
||||||
|
"description": "Description of documentationUrl",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of documentationUrl"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"embargoenddate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of embargoenddate"
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
"description": "Description of format",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of format"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"geolocation": {
|
||||||
|
"description": "Description of geolocation",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"box": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of box"
|
||||||
|
},
|
||||||
|
"place": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of place"
|
||||||
|
},
|
||||||
|
"point": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of point"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of geolocation"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of id"
|
||||||
|
},
|
||||||
|
"indicators": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"bipIndicators": {
|
||||||
|
"description": "Description of bipIndicators",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"clazz": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of clazz"
|
||||||
|
},
|
||||||
|
"indicator": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of indicator"
|
||||||
|
},
|
||||||
|
"score": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of score"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of bipIndicators"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"usageCounts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"downloads": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of downloads"
|
||||||
|
},
|
||||||
|
"views": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of views"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of usageCounts"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of indicators"
|
||||||
|
},
|
||||||
|
"instance": {
|
||||||
|
"description": "Description of instance",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"accessright": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of code"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of label"
|
||||||
|
},
|
||||||
|
"openAccessRoute": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"gold",
|
||||||
|
"green",
|
||||||
|
"hybrid",
|
||||||
|
"bronze"
|
||||||
|
],
|
||||||
|
"description": "Description of openAccessRoute"
|
||||||
|
},
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of accessright"
|
||||||
|
},
|
||||||
|
"alternateIdentifier": {
|
||||||
|
"description": "Description of alternateIdentifier",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of alternateIdentifier"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"articleprocessingcharge": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"amount": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of amount"
|
||||||
|
},
|
||||||
|
"currency": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of currency"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of articleprocessingcharge"
|
||||||
|
},
|
||||||
|
"collectedfrom": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/CfHbKeyValue"},
|
||||||
|
{"description": "Description of collectedfrom"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hostedby": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/CfHbKeyValue"},
|
||||||
|
{"description": "Description of hostedby"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"license": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of license"
|
||||||
|
},
|
||||||
|
"pid": {
|
||||||
|
"description": "Description of pid",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/ResultPid"},
|
||||||
|
{"description": "Description of pid"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"publicationdate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of publicationdate"
|
||||||
|
},
|
||||||
|
"refereed": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of refereed"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of type"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"description": "Description of url",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of url"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of instance"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"isGreen": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Description of isGreen"
|
||||||
|
},
|
||||||
|
"isInDiamondJournal": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Description of isInDiamondJournal"
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of code"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of label"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of language"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Description of lastupdatetimestamp"
|
||||||
|
},
|
||||||
|
"maintitle": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of maintitle"
|
||||||
|
},
|
||||||
|
"openAccessColor": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"gold",
|
||||||
|
"hybrid",
|
||||||
|
"bronze"
|
||||||
|
],
|
||||||
|
"description": "Description of openAccessColor"
|
||||||
|
},
|
||||||
|
"originalId": {
|
||||||
|
"description": "Description of originalId",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of originalId"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pid": {
|
||||||
|
"description": "Description of pid",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/ResultPid"},
|
||||||
|
{"description": "Description of pid"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"programmingLanguage": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of programmingLanguage"
|
||||||
|
},
|
||||||
|
"projects": {
|
||||||
|
"description": "Description of projects",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"acronym": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of acronym"
|
||||||
|
},
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of code"
|
||||||
|
},
|
||||||
|
"funder": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"fundingStream": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of fundingStream"
|
||||||
|
},
|
||||||
|
"jurisdiction": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of jurisdiction"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of name"
|
||||||
|
},
|
||||||
|
"shortName": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of shortName"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of funder"
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of id"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Description of provenance"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of title"
|
||||||
|
},
|
||||||
|
"validated": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"validatedByFunder": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Description of validatedByFunder"
|
||||||
|
},
|
||||||
|
"validationDate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of validationDate"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of validated"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of projects"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"publicationdate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of publicationdate"
|
||||||
|
},
|
||||||
|
"publiclyFunded": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Description of publiclyFunded"
|
||||||
|
},
|
||||||
|
"publisher": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of publisher"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of size"
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
"description": "Description of source",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of source"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subjects": {
|
||||||
|
"description": "Description of subjects",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Description of provenance"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"subject": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of subject"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of subjects"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subtitle": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of subtitle"
|
||||||
|
},
|
||||||
|
"tool": {
|
||||||
|
"description": "Description of tool",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of tool"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of type"
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of version"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,192 +1,196 @@
|
||||||
{
|
{
|
||||||
"$schema":"http://json-schema.org/draft-07/schema#",
|
"$schema" : "http://json-schema.org/draft-07/schema#",
|
||||||
"definitions": {
|
"type" : "object",
|
||||||
"ControlledField": {
|
"properties" : {
|
||||||
"type": "object",
|
"accessrights" : {
|
||||||
"properties": {
|
"type" : "string",
|
||||||
"scheme": {
|
"description" : "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}"
|
||||||
"type": "string"
|
},
|
||||||
|
"certificates" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The certificate, seal or standard the data source complies with. As defined by re3data.org."
|
||||||
|
},
|
||||||
|
"citationguidelineurl" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The URL of the data source providing information on how to cite its items. As defined by re3data.org."
|
||||||
|
},
|
||||||
|
"contenttypes" : {
|
||||||
|
"description" : "Types of content in the data source, as defined by OpenDOAR",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Types of content in the data source, as defined by OpenDOAR"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"databaseaccessrestriction" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}"
|
||||||
|
},
|
||||||
|
"datasourcetype" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The scheme used to express the value (i.e. pubsrepository::journal)"
|
||||||
},
|
},
|
||||||
"value": {
|
"value" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "The value expressed in the scheme (Journal)"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
|
"description" : "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
|
||||||
}
|
|
||||||
},
|
|
||||||
"type":"object",
|
|
||||||
"properties": {
|
|
||||||
"accessrights": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}"
|
|
||||||
},
|
},
|
||||||
"certificates": {
|
"datauploadrestriction" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The certificate, seal or standard the data source complies with. As defined by re3data.org."
|
"description" : "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}"
|
||||||
},
|
},
|
||||||
"citationguidelineurl": {
|
"dateofvalidation" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org."
|
"description" : "The date of last validation against the OpenAIRE guidelines for the datasource records"
|
||||||
},
|
},
|
||||||
"contenttypes": {
|
"description" : {
|
||||||
"description": "Types of content in the data source, as defined by OpenDOAR",
|
"type" : "string"
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"databaseaccessrestriction": {
|
"englishname" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}"
|
"description" : "The English name of the datasource"
|
||||||
},
|
},
|
||||||
"datasourcetype": {
|
"id" : {
|
||||||
"allOf": [
|
"type" : "string",
|
||||||
{
|
"description" : "The OpenAIRE id of the data source"
|
||||||
"$ref": "#/definitions/ControlledField"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"datauploadrestriction": {
|
"journal" : {
|
||||||
"type": "string",
|
"type" : "object",
|
||||||
"description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}"
|
"properties" : {
|
||||||
},
|
"conferencedate" : {
|
||||||
"dateofvalidation": {
|
"type" : "string"
|
||||||
"type": "string",
|
|
||||||
"description": "The date of last validation against the OpenAIRE guidelines for the datasource records"
|
|
||||||
},
|
|
||||||
"description": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"englishname": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The English name of the datasource"
|
|
||||||
},
|
|
||||||
"id": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The OpenAIRE id of the data source"
|
|
||||||
},
|
|
||||||
"journal": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"conferencedate": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
},
|
||||||
"conferenceplace": {
|
"conferenceplace" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"edition": {
|
"edition" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "Edition of the journal or conference proceeding"
|
||||||
},
|
},
|
||||||
"ep": {
|
"ep" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "End page"
|
"description" : "End page"
|
||||||
},
|
},
|
||||||
"iss": {
|
"iss" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Issue number"
|
"description" : "Journal issue number"
|
||||||
},
|
},
|
||||||
"issnLinking": {
|
"issnLinking" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"issnOnline": {
|
"issnOnline" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"issnPrinted": {
|
"issnPrinted" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"name": {
|
"name" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "Name of the journal or conference"
|
||||||
},
|
},
|
||||||
"sp": {
|
"sp" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Start page"
|
"description" : "Start page"
|
||||||
},
|
},
|
||||||
"vol": {
|
"vol" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Volume"
|
"description" : "Volume"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": "Information about the journal, if this data source is of type Journal."
|
"description" : "Information about the journal, if this data source is of type Journal."
|
||||||
},
|
},
|
||||||
"languages": {
|
"languages" : {
|
||||||
"description": "The languages present in the data source's content, as defined by OpenDOAR.",
|
"description" : "The languages present in the data source's content, as defined by OpenDOAR.",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "The languages present in the data source's content, as defined by OpenDOAR."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"logourl": {
|
"logourl" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"missionstatementurl": {
|
"missionstatementurl" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
|
"description" : "The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
|
||||||
},
|
},
|
||||||
"officialname": {
|
"officialname" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The official name of the datasource"
|
"description" : "The official name of the datasource"
|
||||||
},
|
},
|
||||||
"openairecompatibility": {
|
"openairecompatibility" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu."
|
"description" : "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu."
|
||||||
},
|
},
|
||||||
"originalId": {
|
"originalId" : {
|
||||||
"description": "Original identifiers for the datasource"
|
"description" : "Original identifiers for the datasource",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "Original identifiers for the datasource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"pid": {
|
"pid" : {
|
||||||
"description": "Persistent identifiers of the datasource",
|
"description" : "Persistent identifiers of the datasource",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"allOf": [
|
"type" : "object",
|
||||||
{
|
"properties" : {
|
||||||
"$ref": "#/definitions/ControlledField"
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The scheme used to express the value "
|
||||||
|
},
|
||||||
|
"value" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The value expressed in the scheme "
|
||||||
}
|
}
|
||||||
]
|
},
|
||||||
|
"description" : "Persistent identifiers of the datasource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"pidsystems": {
|
"pidsystems" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The persistent identifier system that is used by the data source. As defined by re3data.org"
|
"description" : "The persistent identifier system that is used by the data source. As defined by re3data.org"
|
||||||
},
|
},
|
||||||
"policies": {
|
"policies" : {
|
||||||
"description": "Policies of the data source, as defined in OpenDOAR.",
|
"description" : "Policies of the data source, as defined in OpenDOAR.",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "Policies of the data source, as defined in OpenDOAR."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"releaseenddate": {
|
"releaseenddate" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org"
|
"description" : "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org"
|
||||||
},
|
},
|
||||||
"releasestartdate": {
|
"releasestartdate" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Releasing date of the data source, as defined by re3data.org"
|
"description" : "Releasing date of the data source, as defined by re3data.org"
|
||||||
},
|
},
|
||||||
"subjects": {
|
"subjects" : {
|
||||||
"description": "List of subjects associated to the datasource",
|
"description" : "List of subjects associated to the datasource",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "List of subjects associated to the datasource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"uploadrights": {
|
"uploadrights" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}"
|
"description" : "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}"
|
||||||
},
|
},
|
||||||
"versioning": {
|
"versioning" : {
|
||||||
"type": "boolean",
|
"type" : "boolean",
|
||||||
"description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise."
|
"description" : "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise."
|
||||||
},
|
},
|
||||||
"websiteurl": {
|
"websiteurl" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,621 @@
|
||||||
|
{
|
||||||
|
"$schema" : "http://json-schema.org/draft-07/schema#",
|
||||||
|
"definitions" : {
|
||||||
|
"CfHbKeyValue" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"key" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "the OpenAIRE identifier of the data source"
|
||||||
|
},
|
||||||
|
"value" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "the name of the data source"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Provenance" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"provenance" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"trust" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ResultPid" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories"
|
||||||
|
},
|
||||||
|
"value" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The value expressed in the scheme (i.e. 10.1000/182)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"author" : {
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"fullname" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"name" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"pid" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"id" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The author's pid scheme. OpenAIRE currently supports 'ORCID'"
|
||||||
|
},
|
||||||
|
"value" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"provenance" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/Provenance"
|
||||||
|
}, {
|
||||||
|
"description" : "The reason why the pid was associated to the author"
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "The author's persistent identifiers"
|
||||||
|
},
|
||||||
|
"rank" : {
|
||||||
|
"type" : "integer"
|
||||||
|
},
|
||||||
|
"surname" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bestaccessright" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"code" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||||
|
},
|
||||||
|
"label" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Label for the access mode"
|
||||||
|
},
|
||||||
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "The openest of the access rights of this result."
|
||||||
|
},
|
||||||
|
"codeRepositoryUrl" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Only for results with type 'software': the URL to the repository with the source code"
|
||||||
|
},
|
||||||
|
"collectedfrom" : {
|
||||||
|
"description" : "Information about the sources from which the record has been collected",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/CfHbKeyValue"
|
||||||
|
}, {
|
||||||
|
"description" : "Information about the sources from which the record has been collected"
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contactgroup" : {
|
||||||
|
"description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contactperson" : {
|
||||||
|
"description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"container" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"conferencedate" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"conferenceplace" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"edition" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Edition of the journal or conference proceeding"
|
||||||
|
},
|
||||||
|
"ep" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "End page"
|
||||||
|
},
|
||||||
|
"iss" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Journal issue number"
|
||||||
|
},
|
||||||
|
"issnLinking" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"issnOnline" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"issnPrinted" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"name" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Name of the journal or conference"
|
||||||
|
},
|
||||||
|
"sp" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Start page"
|
||||||
|
},
|
||||||
|
"vol" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Volume"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "Container has information about the conference or journal where the result has been presented or published"
|
||||||
|
},
|
||||||
|
"context" : {
|
||||||
|
"description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"code" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Code identifying the RI/RC"
|
||||||
|
},
|
||||||
|
"label" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Label of the RI/RC"
|
||||||
|
},
|
||||||
|
"provenance" : {
|
||||||
|
"description" : "Why this result is associated to the RI/RC.",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/Provenance"
|
||||||
|
}, {
|
||||||
|
"description" : "Why this result is associated to the RI/RC."
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contributor" : {
|
||||||
|
"description" : "Contributors for the result",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Contributors for the result"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"country" : {
|
||||||
|
"description" : "The list of countries associated to this result",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"code" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "ISO 3166-1 alpha-2 country code (i.e. IT)"
|
||||||
|
},
|
||||||
|
"label" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The label for that code (i.e. Italy)"
|
||||||
|
},
|
||||||
|
"provenance" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/Provenance"
|
||||||
|
}, {
|
||||||
|
"description" : "Why this result is associated to the country."
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "The list of countries associated to this result"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"coverage" : {
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"dateofcollection" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "When OpenAIRE collected the record the last time"
|
||||||
|
},
|
||||||
|
"description" : {
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"documentationUrl" : {
|
||||||
|
"description" : "Only for results with type 'software': URL to the software documentation",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Only for results with type 'software': URL to the software documentation"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"embargoenddate" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Date when the embargo ends and this result turns Open Access"
|
||||||
|
},
|
||||||
|
"format" : {
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"geolocation" : {
|
||||||
|
"description" : "Geolocation information",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"box" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"place" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"point" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "Geolocation information"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The OpenAIRE identifiers for this result"
|
||||||
|
},
|
||||||
|
"indicators" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"bipIndicators" : {
|
||||||
|
"description" : "The impact measures (i.e. popularity)",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"clazz" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"indicator" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"score" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "The impact measures (i.e. popularity)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"usageCounts" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"downloads" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"views" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "The usage counts (i.e. downloads)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "Indicators computed for this result, for example UsageCount ones"
|
||||||
|
},
|
||||||
|
"instance" : {
|
||||||
|
"description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"accessright" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"code" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||||
|
},
|
||||||
|
"label" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Label for the access mode"
|
||||||
|
},
|
||||||
|
"openAccessRoute" : {
|
||||||
|
"type" : "string",
|
||||||
|
"enum" : [ "gold", "green", "hybrid", "bronze" ]
|
||||||
|
},
|
||||||
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "The accessRights for this materialization of the result"
|
||||||
|
},
|
||||||
|
"alternateIdentifier" : {
|
||||||
|
"description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi"
|
||||||
|
},
|
||||||
|
"value" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The value expressed in the scheme"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"articleprocessingcharge" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"amount" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"currency" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
|
||||||
|
},
|
||||||
|
"collectedfrom" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/CfHbKeyValue"
|
||||||
|
}, {
|
||||||
|
"description" : "Information about the source from which the record has been collected"
|
||||||
|
} ]
|
||||||
|
},
|
||||||
|
"hostedby" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/CfHbKeyValue"
|
||||||
|
}, {
|
||||||
|
"description" : "Information about the source from which the instance can be viewed or downloaded."
|
||||||
|
} ]
|
||||||
|
},
|
||||||
|
"license" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"pid" : {
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"$ref" : "#/definitions/ResultPid"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"publicationdate" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Date of the research product"
|
||||||
|
},
|
||||||
|
"refereed" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)"
|
||||||
|
},
|
||||||
|
"type" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
|
||||||
|
},
|
||||||
|
"url" : {
|
||||||
|
"description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. "
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"language" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"code" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "alpha-3/ISO 639-2 code of the language"
|
||||||
|
},
|
||||||
|
"label" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Language label in English"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp" : {
|
||||||
|
"type" : "integer",
|
||||||
|
"description" : "Timestamp of last update of the record in OpenAIRE"
|
||||||
|
},
|
||||||
|
"maintitle" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
|
||||||
|
},
|
||||||
|
"originalId" : {
|
||||||
|
"description" : "Identifiers of the record at the original sources",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Identifiers of the record at the original sources"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pid" : {
|
||||||
|
"description" : "Persistent identifiers of the result",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/ResultPid"
|
||||||
|
}, {
|
||||||
|
"description" : "Persistent identifiers of the result"
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"programmingLanguage" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Only for results with type 'software': the programming language"
|
||||||
|
},
|
||||||
|
"projects" : {
|
||||||
|
"description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"acronym" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The acronym of the project"
|
||||||
|
},
|
||||||
|
"code" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The grant agreement number"
|
||||||
|
},
|
||||||
|
"funder" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"fundingStream" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Stream of funding (e.g. for European Commission can be H2020 or FP7)"
|
||||||
|
},
|
||||||
|
"jurisdiction" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
|
||||||
|
},
|
||||||
|
"name" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The name of the funder (European Commission)"
|
||||||
|
},
|
||||||
|
"shortName" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The short name of the funder (EC)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "Information about the funder funding the project"
|
||||||
|
},
|
||||||
|
"id" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The OpenAIRE id for the project"
|
||||||
|
},
|
||||||
|
"provenance" : {
|
||||||
|
"$ref" : "#/definitions/Provenance"
|
||||||
|
},
|
||||||
|
"title" : {
|
||||||
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"validated" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"validatedByFunder" : {
|
||||||
|
"type" : "boolean"
|
||||||
|
},
|
||||||
|
"validationDate" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"publicationdate" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date."
|
||||||
|
},
|
||||||
|
"publisher" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource."
|
||||||
|
},
|
||||||
|
"size" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Only for results with type 'dataset': the declared size of the dataset"
|
||||||
|
},
|
||||||
|
"source" : {
|
||||||
|
"description" : "See definition of Dublin Core field dc:source",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "See definition of Dublin Core field dc:source"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subjects" : {
|
||||||
|
"description" : "Keywords associated to the result",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"provenance" : {
|
||||||
|
"allOf" : [ {
|
||||||
|
"$ref" : "#/definitions/Provenance"
|
||||||
|
}, {
|
||||||
|
"description" : "Why this subject is associated to the result"
|
||||||
|
} ]
|
||||||
|
},
|
||||||
|
"subject" : {
|
||||||
|
"type" : "object",
|
||||||
|
"properties" : {
|
||||||
|
"scheme" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)."
|
||||||
|
},
|
||||||
|
"value" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "Keywords associated to the result"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subtitle" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Explanatory or alternative name by which a scientific result is known."
|
||||||
|
},
|
||||||
|
"tool" : {
|
||||||
|
"description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
|
||||||
|
},
|
||||||
|
"version" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Version of the result"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,57 +1,59 @@
|
||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
"$schema" : "http://json-schema.org/draft-07/schema#",
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"alternativenames": {
|
"alternativenames" : {
|
||||||
"description": "Alternative names that identify the organisation",
|
"description" : "Alternative names that identify the organisation",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "string"
|
"type" : "string",
|
||||||
|
"description" : "Alternative names that identify the organisation"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"country": {
|
"country" : {
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"code": {
|
"code" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The organisation country code"
|
"description" : "ISO 3166-1 alpha-2 country code (i.e. IT)"
|
||||||
},
|
},
|
||||||
"label": {
|
"label" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The organisation country label"
|
"description" : "The label for that code (i.e. Italy)"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": "The country of the organisation"
|
"description" : "The organisation country"
|
||||||
},
|
},
|
||||||
"id": {
|
"id" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The OpenAIRE id for the organisation"
|
"description" : "The OpenAIRE id for the organisation"
|
||||||
},
|
},
|
||||||
"legalname": {
|
"legalname" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"legalshortname": {
|
"legalshortname" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"pid": {
|
"pid" : {
|
||||||
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370",
|
"description" : "Persistent identifiers for the organisation i.e. isni 0000000090326370",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"scheme": {
|
"scheme" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The scheme of the identifier (i.e. isni)"
|
"description" : "The scheme of the identifier (i.e. isni)"
|
||||||
},
|
},
|
||||||
"value": {
|
"value" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "the value in the schema (i.e. 0000000090326370)"
|
"description" : "The value in the schema (i.e. 0000000090326370)"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description" : "Persistent identifiers for the organisation i.e. isni 0000000090326370"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"websiteurl": {
|
"websiteurl" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,119 +1,119 @@
|
||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
"$schema" : "http://json-schema.org/draft-07/schema#",
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"acronym": {
|
"acronym" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"callidentifier": {
|
"callidentifier" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"code": {
|
"code" : {
|
||||||
"type": "string",
|
"type" : "string"
|
||||||
"description": "The grant agreement number"
|
|
||||||
},
|
},
|
||||||
"enddate": {
|
"enddate" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"funding": {
|
"funding" : {
|
||||||
"description": "Funding information for the project",
|
"description" : "Funding information for the project",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"funding_stream": {
|
"funding_stream" : {
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"description": {
|
"description" : {
|
||||||
"type": "string",
|
"type" : "string"
|
||||||
"description": "Description of the funding stream"
|
|
||||||
},
|
},
|
||||||
"id": {
|
"id" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "Id of the funding stream"
|
"description" : "Id of the funding stream"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description" : "Description of the funding stream"
|
||||||
},
|
},
|
||||||
"jurisdiction": {
|
"jurisdiction" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The jurisdiction of the funder (i.e. EU)"
|
"description" : "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
|
||||||
},
|
},
|
||||||
"name": {
|
"name" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The name of the funder (European Commission)"
|
"description" : "The name of the funder (European Commission)"
|
||||||
},
|
},
|
||||||
"shortName": {
|
"shortName" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The short name of the funder (EC)"
|
"description" : "The short name of the funder (EC)"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description" : "Funding information for the project"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"granted": {
|
"granted" : {
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"currency": {
|
"currency" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The currency of the granted amount (e.g. EUR)"
|
"description" : "The currency of the granted amount (e.g. EUR)"
|
||||||
},
|
},
|
||||||
"fundedamount": {
|
"fundedamount" : {
|
||||||
"type": "number",
|
"type" : "number",
|
||||||
"description": "The funded amount"
|
"description" : "The funded amount"
|
||||||
},
|
},
|
||||||
"totalcost": {
|
"totalcost" : {
|
||||||
"type": "number",
|
"type" : "number",
|
||||||
"description": "The total cost of the project"
|
"description" : "The total cost of the project"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": "The money granted to the project"
|
"description" : "The money granted to the project"
|
||||||
},
|
},
|
||||||
"h2020programme": {
|
"h2020programme" : {
|
||||||
"description": "The h2020 programme funding the project",
|
"description" : "The h2020 programme funding the project",
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "object",
|
"type" : "object",
|
||||||
"properties": {
|
"properties" : {
|
||||||
"code": {
|
"code" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The code of the programme"
|
"description" : "The code of the programme"
|
||||||
},
|
},
|
||||||
"description": {
|
"description" : {
|
||||||
"type": "string",
|
"type" : "string",
|
||||||
"description": "The description of the programme"
|
"description" : "The description of the programme"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description" : "The h2020 programme funding the project"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"id": {
|
"id" : {
|
||||||
"type": "string",
|
"type" : "string"
|
||||||
"description": "OpenAIRE id for the project"
|
|
||||||
},
|
},
|
||||||
"keywords": {
|
"keywords" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"openaccessmandatefordataset": {
|
"openaccessmandatefordataset" : {
|
||||||
"type": "boolean"
|
"type" : "boolean"
|
||||||
},
|
},
|
||||||
"openaccessmandateforpublications": {
|
"openaccessmandateforpublications" : {
|
||||||
"type": "boolean"
|
"type" : "boolean"
|
||||||
},
|
},
|
||||||
"startdate": {
|
"startdate" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"subject": {
|
"subject" : {
|
||||||
"type": "array",
|
"type" : "array",
|
||||||
"items": {
|
"items" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"summary": {
|
"summary" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"title": {
|
"title" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"websiteurl": {
|
"websiteurl" : {
|
||||||
"type": "string"
|
"type" : "string"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,68 +1,54 @@
|
||||||
{
|
{
|
||||||
"$schema":"http://json-schema.org/draft-07/schema#",
|
"$schema" : "http://json-schema.org/draft-07/schema#",
|
||||||
"definitions": {
|
"type" : "object",
|
||||||
"Node": {
|
"properties" : {
|
||||||
"type": "object",
|
"provenance" : {
|
||||||
"properties": {
|
"type" : "object",
|
||||||
"id": {
|
"properties" : {
|
||||||
"type": "string",
|
"provenance" : {
|
||||||
"description": "The OpenAIRE id of the entity"
|
"type" : "string"
|
||||||
},
|
},
|
||||||
"type": {
|
"trust" : {
|
||||||
"type": "string",
|
"type" : "string"
|
||||||
"description": "The type of the entity (i.e. organisation)"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"type":"object",
|
|
||||||
"properties": {
|
|
||||||
"provenance": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"provenance": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The reason why OpenAIRE holds the relation "
|
|
||||||
},
|
|
||||||
"trust": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"reltype": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"name": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The semantics of the relation (i.e. isAuthorInstitutionOf). "
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "the type of the relation (i.e. affiliation)"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": "To represent the semantics of a relation between two entities"
|
"description" : "The reason why OpenAIRE holds the relation "
|
||||||
},
|
},
|
||||||
"source": {
|
"reltype" : {
|
||||||
"allOf": [
|
"type" : "object",
|
||||||
{"$ref": "#/definitions/Node"},
|
"properties" : {
|
||||||
{"description": "The node source in the relation"}
|
"name" : {
|
||||||
]
|
"type" : "string"
|
||||||
|
},
|
||||||
|
"type" : {
|
||||||
|
"type" : "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description" : "To represent the semantics of a relation between two entities"
|
||||||
},
|
},
|
||||||
"target": {
|
"source" : {
|
||||||
"allOf": [
|
"type" : "string",
|
||||||
{"$ref": "#/definitions/Node"},
|
"description" : "The identifier of the source in the relation"
|
||||||
{"description": "The node target in the relation"}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"validated":{
|
"sourceType" : {
|
||||||
"type":"boolean",
|
"type" : "string",
|
||||||
"description":"True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)"
|
"description" : "The entity type of the source in the relation"
|
||||||
},
|
},
|
||||||
"validationDate":{
|
"target" : {
|
||||||
"type":"string",
|
"type" : "string",
|
||||||
"description":"The date when the relation was collected from OpenAIRE"
|
"description" : "The identifier of the target in the relation"
|
||||||
|
},
|
||||||
|
"targetType" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The entity type of the target in the relation"
|
||||||
|
},
|
||||||
|
"validated" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)"
|
||||||
|
},
|
||||||
|
"validationDate" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The date when the relation was collected from OpenAIRE"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,415 +1,506 @@
|
||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
"definitions": {
|
"definitions": {
|
||||||
"ControlledField": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"scheme": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"value": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
|
|
||||||
},
|
|
||||||
"Provenance": {
|
"Provenance": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"provenance": {
|
"provenance": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The process that produced/provided the information"
|
"description": "Description of provenance"
|
||||||
},
|
},
|
||||||
"trust": {
|
"trust": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of trust"
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
"description": "Indicates the process that produced (or provided) the information, and the trust associated to the information"
|
},
|
||||||
|
"ResultPid": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"author": {
|
"author": {
|
||||||
|
"description": "Description of author",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"fullname": {
|
"fullname": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of fullname"
|
||||||
},
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of name"
|
||||||
},
|
},
|
||||||
"pid": {
|
"pid": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"id": {
|
"id": {
|
||||||
"allOf": [
|
"type": "object",
|
||||||
{"$ref": "#/definitions/ControlledField"},
|
"properties": {
|
||||||
{"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
|
"scheme": {
|
||||||
]
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of id"
|
||||||
},
|
},
|
||||||
"provenance": {
|
"provenance": {
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{"$ref": "#/definitions/Provenance"},
|
{"$ref": "#/definitions/Provenance"},
|
||||||
{"description": "Provenance of author's pid"}
|
{"description": "Description of provenance"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "Description of pid"
|
||||||
},
|
},
|
||||||
"rank": {
|
"rank": {
|
||||||
"type": "integer"
|
"type": "integer",
|
||||||
|
"description": "Description of rank"
|
||||||
},
|
},
|
||||||
"surname": {
|
"surname": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of surname"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "Description of author"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"bestaccessright":{
|
"bestaccessright": {
|
||||||
"type":"object",
|
"type": "object",
|
||||||
"properties":{
|
"properties": {
|
||||||
"code": {
|
"code": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
"description": "Description of code"
|
||||||
},
|
},
|
||||||
"label": {
|
"label": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Label for the access mode"
|
"description": "Description of label"
|
||||||
},
|
},
|
||||||
"scheme": {
|
"scheme": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
"description": "Description of scheme"
|
||||||
}
|
}
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
"description": "Description of bestaccessright"
|
||||||
|
},
|
||||||
"codeRepositoryUrl": {
|
"codeRepositoryUrl": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Only for results with type 'software': the URL to the repository with the source code"
|
"description": "Description of codeRepositoryUrl"
|
||||||
},
|
},
|
||||||
"contactgroup": {
|
"contactgroup": {
|
||||||
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
|
"description": "Description of contactgroup",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of contactgroup"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"contactperson": {
|
"contactperson": {
|
||||||
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
|
"description": "Description of contactperson",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of contactperson"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"container": {
|
"container": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"conferencedate": {
|
"conferencedate": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of conferencedate"
|
||||||
},
|
},
|
||||||
"conferenceplace": {
|
"conferenceplace": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of conferenceplace"
|
||||||
},
|
},
|
||||||
"edition": {
|
"edition": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Edition of the journal or conference proceeding"
|
"description": "Description of edition"
|
||||||
},
|
},
|
||||||
"ep": {
|
"ep": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "End page"
|
"description": "Description of ep"
|
||||||
},
|
},
|
||||||
"iss": {
|
"iss": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Journal issue"
|
"description": "Description of iss"
|
||||||
},
|
},
|
||||||
"issnLinking": {
|
"issnLinking": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of issnLinking"
|
||||||
},
|
},
|
||||||
"issnOnline": {
|
"issnOnline": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of issnOnline"
|
||||||
},
|
},
|
||||||
"issnPrinted": {
|
"issnPrinted": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of issnPrinted"
|
||||||
},
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Name of the journal or conference"
|
"description": "Description of name"
|
||||||
},
|
},
|
||||||
"sp": {
|
"sp": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "start page"
|
"description": "Description of sp"
|
||||||
},
|
},
|
||||||
"vol": {
|
"vol": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of vol"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": "Container has information about the conference or journal where the result has been presented or published"
|
"description": "Description of container"
|
||||||
},
|
},
|
||||||
"contributor": {
|
"contributor": {
|
||||||
|
"description": "Description of contributor",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Contributors for the result"
|
"description": "Description of contributor"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"country": {
|
"country": {
|
||||||
|
"description": "Description of country",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"code": {
|
"code": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "ISO 3166-1 alpha-2 country code"
|
"description": "Description of code"
|
||||||
},
|
},
|
||||||
"label": {
|
"label": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of label"
|
||||||
},
|
},
|
||||||
"provenance": {
|
"provenance": {
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{"$ref": "#/definitions/Provenance"},
|
{"$ref": "#/definitions/Provenance"},
|
||||||
{"description": "Why this result is associated to the country."}
|
{"description": "Description of provenance"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "Description of country"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"coverage": {
|
"coverage": {
|
||||||
|
"description": "Description of coverage",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of coverage"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"dateofcollection": {
|
"dateofcollection": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "When OpenAIRE collected the record the last time"
|
"description": "Description of dateofcollection"
|
||||||
},
|
},
|
||||||
"description": {
|
"description": {
|
||||||
|
"description": "Description of description",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of description"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"documentationUrl": {
|
"documentationUrl": {
|
||||||
"description": "Only for results with type 'software': URL to the software documentation",
|
"description": "Description of documentationUrl",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of documentationUrl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"embargoenddate": {
|
"embargoenddate": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Date when the embargo ends and this result turns Open Access"
|
"description": "Description of embargoenddate"
|
||||||
},
|
},
|
||||||
"format": {
|
"format": {
|
||||||
|
"description": "Description of format",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of format"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"geolocation": {
|
"geolocation": {
|
||||||
"description": "Geolocation information",
|
"description": "Description of geolocation",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"box": {
|
"box": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of box"
|
||||||
},
|
},
|
||||||
"place": {
|
"place": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of place"
|
||||||
},
|
},
|
||||||
"point": {
|
"point": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of point"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "Description of geolocation"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"id": {
|
"id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "OpenAIRE Identifier"
|
"description": "Description of id"
|
||||||
},
|
},
|
||||||
"instance":{
|
"indicators": {
|
||||||
"description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
|
"type": "object",
|
||||||
"type":"array",
|
"properties": {
|
||||||
"items":{
|
"bipIndicators": {
|
||||||
"type":"object",
|
"description": "Description of bipIndicators",
|
||||||
"properties":{
|
"type": "array",
|
||||||
"accessright":{
|
"items": {
|
||||||
"type":"object",
|
"type": "object",
|
||||||
"properties":{
|
"properties": {
|
||||||
|
"clazz": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of clazz"
|
||||||
|
},
|
||||||
|
"indicator": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of indicator"
|
||||||
|
},
|
||||||
|
"score": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of score"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of bipIndicators"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"usageCounts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"downloads": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of downloads"
|
||||||
|
},
|
||||||
|
"views": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of views"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of usageCounts"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of indicators"
|
||||||
|
},
|
||||||
|
"instance": {
|
||||||
|
"description": "Description of instance",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"accessright": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
"code": {
|
"code": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
"description": "Description of code"
|
||||||
},
|
},
|
||||||
"label": {
|
"label": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Label for the access mode"
|
"description": "Description of label"
|
||||||
},
|
},
|
||||||
"openAccessRoute":{
|
"openAccessRoute": {
|
||||||
"type":"string",
|
"type": "string",
|
||||||
"enum":[
|
"enum": [
|
||||||
"gold",
|
"gold",
|
||||||
"green",
|
"green",
|
||||||
"hybrid",
|
"hybrid",
|
||||||
"bronze"
|
"bronze"
|
||||||
],
|
],
|
||||||
"description":"The type of OpenAccess applied to the result"
|
"description": "Description of openAccessRoute"
|
||||||
},
|
},
|
||||||
"scheme": {
|
"scheme": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
"description": "Description of scheme"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of accessright"
|
||||||
|
},
|
||||||
|
"alternateIdentifier": {
|
||||||
|
"description": "Description of alternateIdentifier",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of alternateIdentifier"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"alternateIdentifier":{
|
"articleprocessingcharge": {
|
||||||
"type":"array",
|
"type": "object",
|
||||||
"items":{
|
"properties": {
|
||||||
"allOf":[
|
"amount": {
|
||||||
{
|
"type": "string",
|
||||||
"$ref":"#/definitions/ControlledField"
|
"description": "Description of amount"
|
||||||
},
|
},
|
||||||
{
|
"currency": {
|
||||||
"description":"All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
|
"type": "string",
|
||||||
}
|
"description": "Description of currency"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of articleprocessingcharge"
|
||||||
|
},
|
||||||
|
"license": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of license"
|
||||||
|
},
|
||||||
|
"pid": {
|
||||||
|
"description": "Description of pid",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/ResultPid"},
|
||||||
|
{"description": "Description of pid"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"articleprocessingcharge":{
|
"publicationdate": {
|
||||||
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.",
|
"type": "string",
|
||||||
"type":"object",
|
"description": "Description of publicationdate"
|
||||||
"properties":{
|
|
||||||
"amount":{
|
|
||||||
"type":"string"
|
|
||||||
},
|
|
||||||
"currency":{
|
|
||||||
"type":"string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"license":{
|
"refereed": {
|
||||||
"type":"string"
|
"type": "string",
|
||||||
|
"description": "Description of refereed"
|
||||||
},
|
},
|
||||||
"measures":{
|
"type": {
|
||||||
|
"type": "string",
|
||||||
"type":"array",
|
"description": "Description of type"
|
||||||
"items":{
|
|
||||||
"type":"object",
|
|
||||||
"properties":{
|
|
||||||
"key":{
|
|
||||||
"type":"string",
|
|
||||||
"description":"The measure"
|
|
||||||
},
|
|
||||||
"value":{
|
|
||||||
"type":"string",
|
|
||||||
"description":"The value for the measure"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"description":"Measures computed for this instance, for example Bip!Finder ones"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"pid":{
|
"url": {
|
||||||
"description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)",
|
"description": "Description of url",
|
||||||
"type":"array",
|
"type": "array",
|
||||||
"items":{
|
"items": {
|
||||||
"allOf":[
|
"type": "string",
|
||||||
{
|
"description": "Description of url"
|
||||||
"$ref":"#/definitions/ControlledField"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"The persistent identifier associated to the result"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"publicationdate":{
|
|
||||||
"type":"string",
|
|
||||||
"description": "Date of the research product"
|
|
||||||
},
|
|
||||||
"refereed":{
|
|
||||||
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)",
|
|
||||||
"type":"string"
|
|
||||||
},
|
|
||||||
"type":{
|
|
||||||
"type":"string",
|
|
||||||
"description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
|
|
||||||
},
|
|
||||||
"url":{
|
|
||||||
"description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
|
|
||||||
"type":"array",
|
|
||||||
"items":{
|
|
||||||
"type":"string"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "Description of instance"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"isGreen": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Description of isGreen"
|
||||||
|
},
|
||||||
|
"isInDiamondJournal": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Description of isInDiamondJournal"
|
||||||
|
},
|
||||||
"language": {
|
"language": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"code": {
|
"code": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "alpha-3/ISO 639-2 code of the language"
|
"description": "Description of code"
|
||||||
},
|
},
|
||||||
"label": {
|
"label": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Language label in English"
|
"description": "Description of label"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "Description of language"
|
||||||
},
|
},
|
||||||
"lastupdatetimestamp": {
|
"lastupdatetimestamp": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "Timestamp of last update of the record in OpenAIRE"
|
"description": "Description of lastupdatetimestamp"
|
||||||
},
|
},
|
||||||
"maintitle": {
|
"maintitle": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
|
"description": "Description of maintitle"
|
||||||
},
|
},
|
||||||
"subtitle": {
|
"openAccessColor": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"descriptio": "Explanatory or alternative name by which a scientific result is known."
|
"enum": [
|
||||||
|
"gold",
|
||||||
|
"hybrid",
|
||||||
|
"bronze"
|
||||||
|
],
|
||||||
|
"description": "Description of openAccessColor"
|
||||||
},
|
},
|
||||||
"originalId": {
|
"originalId": {
|
||||||
"description": "Identifiers of the record at the original sources",
|
"description": "Description of originalId",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of originalId"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"pid": {
|
"pid": {
|
||||||
"description": "Persistent identifiers of the result",
|
"description": "Description of pid",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{"$ref": "#/definitions/ControlledField"},
|
{"$ref": "#/definitions/ResultPid"},
|
||||||
{"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "}
|
{"description": "Description of pid"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"programmingLanguage": {
|
"programmingLanguage": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Only for results with type 'software': the programming language"
|
"description": "Description of programmingLanguage"
|
||||||
},
|
},
|
||||||
"publicationdate": {
|
"publicationdate": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date."
|
"description": "Description of publicationdate"
|
||||||
|
},
|
||||||
|
"publiclyFunded": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Description of publiclyFunded"
|
||||||
},
|
},
|
||||||
"publisher": {
|
"publisher": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource."
|
"description": "Description of publisher"
|
||||||
},
|
},
|
||||||
"size": {
|
"size": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Only for results with type 'dataset': the declared size of the dataset"
|
"description": "Description of size"
|
||||||
},
|
},
|
||||||
"source": {
|
"source": {
|
||||||
"description": "See definition of Dublin Core field dc:source",
|
"description": "Description of source",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of source"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"subjects": {
|
"subjects": {
|
||||||
"description": "Keywords associated to the result",
|
"description": "Description of subjects",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -417,32 +508,46 @@
|
||||||
"provenance": {
|
"provenance": {
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{"$ref": "#/definitions/Provenance"},
|
{"$ref": "#/definitions/Provenance"},
|
||||||
{"description": "Why this subject is associated to the result"}
|
{"description": "Description of provenance"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"subject": {
|
"subject": {
|
||||||
"allOf": [
|
"type": "object",
|
||||||
{"$ref": "#/definitions/ControlledField"},
|
"properties": {
|
||||||
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}
|
"scheme": {
|
||||||
]
|
"type": "string",
|
||||||
|
"description": "Description of scheme"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Description of subject"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "Description of subjects"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"subtitle": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of subtitle"
|
||||||
|
},
|
||||||
"tool": {
|
"tool": {
|
||||||
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
|
"description": "Description of tool",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Description of tool"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
|
"description": "Description of type"
|
||||||
},
|
},
|
||||||
"version": {
|
"version": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Version of the result"
|
"description": "Description of version"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -9,8 +9,9 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
|
||||||
import com.github.victools.jsonschema.generator.*;
|
import com.github.victools.jsonschema.generator.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.ExecCreateSchemas;
|
import eu.dnetlib.dhp.ExecCreateSchemas;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Result;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.*;
|
||||||
|
|
||||||
//@Disabled
|
//@Disabled
|
||||||
class GenerateJsonSchema {
|
class GenerateJsonSchema {
|
||||||
|
@ -24,7 +25,7 @@ class GenerateJsonSchema {
|
||||||
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
|
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
|
||||||
SchemaGeneratorConfig config = configBuilder.build();
|
SchemaGeneratorConfig config = configBuilder.build();
|
||||||
SchemaGenerator generator = new SchemaGenerator(config);
|
SchemaGenerator generator = new SchemaGenerator(config);
|
||||||
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
|
JsonNode jsonSchema = generator.generateSchema(CommunityResult.class);
|
||||||
|
|
||||||
System.out.println(jsonSchema.toString());
|
System.out.println(jsonSchema.toString());
|
||||||
}
|
}
|
||||||
|
@ -41,12 +42,11 @@ class GenerateJsonSchema {
|
||||||
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
|
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
|
||||||
SchemaGeneratorConfig config = configBuilder.build();
|
SchemaGeneratorConfig config = configBuilder.build();
|
||||||
SchemaGenerator generator = new SchemaGenerator(config);
|
SchemaGenerator generator = new SchemaGenerator(config);
|
||||||
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
|
JsonNode jsonSchema = generator.generateSchema(Result.class);
|
||||||
|
|
||||||
System.out.println(jsonSchema.toString());
|
System.out.println(jsonSchema.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void generateJsonSchema3() throws IOException {
|
void generateJsonSchema3() throws IOException {
|
||||||
|
|
||||||
|
|
21
dump/pom.xml
21
dump/pom.xml
|
@ -53,7 +53,26 @@
|
||||||
<artifactId>dump-schema</artifactId>
|
<artifactId>dump-schema</artifactId>
|
||||||
<version>1.2.5-SNAPSHOT</version>
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>api</artifactId>
|
||||||
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.github.classgraph</groupId>
|
||||||
|
<artifactId>classgraph</artifactId>
|
||||||
|
<version>4.8.71</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>api</artifactId>
|
||||||
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,201 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.common;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class MakeTarArchive implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MakeTarArchive.class);
|
||||||
|
private static int index = 1;
|
||||||
|
private static String prevname = new String();
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
MakeTarArchive.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/common/input_maketar_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("hdfsPath");
|
||||||
|
log.info("hdfsPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("input path : {}", inputPath);
|
||||||
|
|
||||||
|
final int gBperSplit = Optional
|
||||||
|
.ofNullable(parser.get("splitSize"))
|
||||||
|
.map(Integer::valueOf)
|
||||||
|
.orElse(10);
|
||||||
|
|
||||||
|
final boolean rename = Optional
|
||||||
|
.ofNullable(parser.get("rename"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.FALSE);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, rename);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
||||||
|
throws IOException {
|
||||||
|
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit,
|
||||||
|
boolean rename)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||||
|
|
||||||
|
while (dirIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = dirIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String pathString = p.toString();
|
||||||
|
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
|
||||||
|
|
||||||
|
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit, rename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
||||||
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dirName,
|
||||||
|
boolean rename)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
|
||||||
|
fileSystem.create(hdfsWritePath).getWrappedStream())) {
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> iterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(inputPath), true);
|
||||||
|
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
writeCurrentFile(fileSystem, dirName, iterator, ar, 0, rename);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
||||||
|
int gBperSplit, boolean rename) throws IOException {
|
||||||
|
final long bytesPerSplit = 1024L * 1024L * 1024L * gBperSplit;
|
||||||
|
|
||||||
|
long sourceSize = fileSystem.getContentSummary(new Path(inputPath)).getSpaceConsumed();
|
||||||
|
|
||||||
|
if (sourceSize < bytesPerSplit) {
|
||||||
|
write(fileSystem, inputPath, outputPath + ".tar", dir_name, rename);
|
||||||
|
} else {
|
||||||
|
int partNum = 0;
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(inputPath), true);
|
||||||
|
boolean next = fileStatusListIterator.hasNext();
|
||||||
|
while (next) {
|
||||||
|
try (TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar")) {
|
||||||
|
|
||||||
|
long currentSize = 0;
|
||||||
|
while (next && currentSize < bytesPerSplit) {
|
||||||
|
currentSize = writeCurrentFile(
|
||||||
|
fileSystem, dir_name, fileStatusListIterator, ar, currentSize, rename);
|
||||||
|
next = fileStatusListIterator.hasNext();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
partNum += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long writeCurrentFile(FileSystem fileSystem, String dirName,
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator,
|
||||||
|
TarArchiveOutputStream ar, long currentSize, boolean rename) throws IOException {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String pString = p.toString();
|
||||||
|
if (!pString.endsWith("_SUCCESS")) {
|
||||||
|
String name = pString.substring(pString.lastIndexOf("/") + 1);
|
||||||
|
// if (name.startsWith("part-") & name.length() > 10) {
|
||||||
|
// String tmp = name.substring(0, 10);
|
||||||
|
// if (prevname.equalsIgnoreCase(tmp)) {
|
||||||
|
// tmp = tmp + "_" + index;
|
||||||
|
// index += 1;
|
||||||
|
// } else {
|
||||||
|
// prevname = tmp;
|
||||||
|
// index = 1;
|
||||||
|
// }
|
||||||
|
// if (name.contains(".")) {
|
||||||
|
// tmp += name.substring(name.indexOf("."));
|
||||||
|
// }
|
||||||
|
// name = tmp;
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
if (rename) {
|
||||||
|
if (name.endsWith(".txt.gz"))
|
||||||
|
name = name.replace(".txt.gz", ".json.gz");
|
||||||
|
}
|
||||||
|
|
||||||
|
TarArchiveEntry entry = new TarArchiveEntry(dirName + "/" + name);
|
||||||
|
entry.setSize(fileStatus.getLen());
|
||||||
|
currentSize += fileStatus.getLen();
|
||||||
|
ar.putArchiveEntry(entry);
|
||||||
|
|
||||||
|
InputStream is = fileSystem.open(fileStatus.getPath());
|
||||||
|
|
||||||
|
BufferedInputStream bis = new BufferedInputStream(is);
|
||||||
|
|
||||||
|
int count;
|
||||||
|
byte[] data = new byte[1024];
|
||||||
|
while ((count = bis.read(data, 0, data.length)) != -1) {
|
||||||
|
ar.write(data, 0, count);
|
||||||
|
}
|
||||||
|
bis.close();
|
||||||
|
ar.closeArchiveEntry();
|
||||||
|
|
||||||
|
}
|
||||||
|
return currentSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -27,6 +27,14 @@ public class Constants {
|
||||||
|
|
||||||
public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
||||||
|
|
||||||
|
public static final String USAGE_COUNT_DOWNLOADS = "downloads";
|
||||||
|
public static final String USAGE_COUNT_VIEWS = "views";
|
||||||
|
public static final String IMPACT_POPULARITY = "popularity";
|
||||||
|
public static final String IMPACT_POPULARITY_ALT = "popularity_alt";
|
||||||
|
public static final String IMPACT_INFLUENCE = "influence";
|
||||||
|
public static final String IMPACT_INFLUENCE_ALT = "influence_alt";
|
||||||
|
public static final String IMPACT_IMPULSE = "impulse";
|
||||||
|
|
||||||
static {
|
static {
|
||||||
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
|
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
|
||||||
ACCESS_RIGHTS_COAR_MAP.put("RESTRICTED", "c_16ec");
|
ACCESS_RIGHTS_COAR_MAP.put("RESTRICTED", "c_16ec");
|
||||||
|
|
|
@ -1,111 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
|
||||||
import eu.dnetlib.dhp.oa.model.Result;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* It fires the execution of the actual dump for result entities. If the dump is for RC/RI products its checks for each
|
|
||||||
* result its belongingess to at least one RC/RI before "asking" for its mapping.
|
|
||||||
*/
|
|
||||||
public class DumpProducts implements Serializable {
|
|
||||||
|
|
||||||
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
|
|
||||||
Class<? extends OafEntity> inputClazz,
|
|
||||||
Class<? extends Result> outputClazz,
|
|
||||||
String dumpType) {
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
|
||||||
execDump(
|
|
||||||
spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <I extends OafEntity, O extends Result> void execDump(
|
|
||||||
SparkSession spark,
|
|
||||||
String inputPath,
|
|
||||||
String outputPath,
|
|
||||||
String communityMapPath,
|
|
||||||
Class<I> inputClazz,
|
|
||||||
Class<O> outputClazz,
|
|
||||||
String dumpType) {
|
|
||||||
|
|
||||||
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
|
||||||
|
|
||||||
Utils
|
|
||||||
.readPath(spark, inputPath, inputClazz)
|
|
||||||
.map((MapFunction<I, O>) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz))
|
|
||||||
.filter((FilterFunction<O>) value -> value != null)
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <I extends OafEntity, O extends Result> O execMap(I value,
|
|
||||||
CommunityMap communityMap,
|
|
||||||
String dumpType) throws NoAvailableEntityTypeException, CardinalityTooHighException {
|
|
||||||
|
|
||||||
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
|
||||||
if (odInfo.isPresent()) {
|
|
||||||
if (odInfo.get().getDeletedbyinference() || odInfo.get().getInvisible()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Constants.DUMPTYPE.COMMUNITY.getType().equals(dumpType)) {
|
|
||||||
Set<String> communities = communityMap.keySet();
|
|
||||||
|
|
||||||
Optional<List<Context>> inputContext = Optional
|
|
||||||
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
|
|
||||||
if (!inputContext.isPresent()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
List<String> toDumpFor = inputContext.get().stream().map(c -> {
|
|
||||||
if (communities.contains(c.getId())) {
|
|
||||||
return c.getId();
|
|
||||||
}
|
|
||||||
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
|
|
||||||
return c.getId().substring(0, c.getId().indexOf("::"));
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
|
||||||
if (toDumpFor.isEmpty()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (O) ResultMapper.map(value, communityMap, dumpType);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -15,7 +15,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.MakeTarArchive;
|
import eu.dnetlib.dhp.oa.common.MakeTarArchive;
|
||||||
|
|
||||||
public class MakeTar implements Serializable {
|
public class MakeTar implements Serializable {
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ public class MakeTar implements Serializable {
|
||||||
String pathString = p.toString();
|
String pathString = p.toString();
|
||||||
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
|
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
|
||||||
|
|
||||||
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
|
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,74 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.dom4j.Document;
|
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.dom4j.Element;
|
|
||||||
import org.dom4j.io.SAXReader;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class QueryInformationSystem {
|
|
||||||
|
|
||||||
private ISLookUpService isLookUp;
|
|
||||||
|
|
||||||
private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
||||||
+
|
|
||||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
|
||||||
" and ($x//context/param[./@name = 'status']/text() = 'all') "
|
|
||||||
+
|
|
||||||
" return " +
|
|
||||||
"<community> " +
|
|
||||||
"{$x//CONFIGURATION/context/@id}" +
|
|
||||||
"{$x//CONFIGURATION/context/@label}" +
|
|
||||||
"</community>";
|
|
||||||
|
|
||||||
private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
||||||
+
|
|
||||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
|
||||||
" and $x//CONFIGURATION/context[./@id=%s] "
|
|
||||||
+
|
|
||||||
" return " +
|
|
||||||
"<community> " +
|
|
||||||
"{$x//CONFIGURATION/context/@id}" +
|
|
||||||
"{$x//CONFIGURATION/context/@label}" +
|
|
||||||
"</community>";
|
|
||||||
|
|
||||||
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
|
|
||||||
throws ISLookUpException, DocumentException, SAXException {
|
|
||||||
if (singleCommunity)
|
|
||||||
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
|
|
||||||
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public ISLookUpService getIsLookUp() {
|
|
||||||
return isLookUp;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIsLookUp(ISLookUpService isLookUpService) {
|
|
||||||
this.isLookUp = isLookUpService;
|
|
||||||
}
|
|
||||||
|
|
||||||
private CommunityMap getMap(List<String> communityMap) throws DocumentException, SAXException {
|
|
||||||
final CommunityMap map = new CommunityMap();
|
|
||||||
|
|
||||||
for (String xml : communityMap) {
|
|
||||||
final Document doc;
|
|
||||||
final SAXReader reader = new SAXReader();
|
|
||||||
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
|
||||||
doc = reader.read(new StringReader(xml));
|
|
||||||
Element root = doc.getRootElement();
|
|
||||||
map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,13 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Constants.*;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
@ -16,9 +18,10 @@ import eu.dnetlib.dhp.oa.model.AccessRight;
|
||||||
import eu.dnetlib.dhp.oa.model.Author;
|
import eu.dnetlib.dhp.oa.model.Author;
|
||||||
import eu.dnetlib.dhp.oa.model.GeoLocation;
|
import eu.dnetlib.dhp.oa.model.GeoLocation;
|
||||||
import eu.dnetlib.dhp.oa.model.Instance;
|
import eu.dnetlib.dhp.oa.model.Instance;
|
||||||
import eu.dnetlib.dhp.oa.model.Measure;
|
import eu.dnetlib.dhp.oa.model.OpenAccessColor;
|
||||||
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
|
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
|
||||||
import eu.dnetlib.dhp.oa.model.Result;
|
import eu.dnetlib.dhp.oa.model.Result;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Subject;
|
||||||
import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue;
|
import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue;
|
||||||
import eu.dnetlib.dhp.oa.model.community.CommunityInstance;
|
import eu.dnetlib.dhp.oa.model.community.CommunityInstance;
|
||||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||||
|
@ -28,7 +31,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class ResultMapper implements Serializable {
|
public class ResultMapper implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(ResultMapper.class);
|
private static final String NULL = "null";
|
||||||
|
|
||||||
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
||||||
E in, Map<String, String> communityMap, String dumpType)
|
E in, Map<String, String> communityMap, String dumpType)
|
||||||
|
@ -46,262 +49,39 @@ public class ResultMapper implements Serializable {
|
||||||
if (ort.isPresent()) {
|
if (ort.isPresent()) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
addTypeSpecificInformation(out, input, ort);
|
addTypeSpecificInformation(out, input, ort.get());
|
||||||
|
mapAuthor(out, input);
|
||||||
Optional
|
mapAccessRight(out, input);
|
||||||
.ofNullable(input.getAuthor())
|
mapContributor(out, input);
|
||||||
.ifPresent(
|
mapCountry(out, input);
|
||||||
ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList())));
|
mapCoverage(out, input);
|
||||||
|
|
||||||
// I do not map Access Right UNKNOWN or OTHER
|
|
||||||
|
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
|
||||||
if (oar.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(oar.get().getClassid())) {
|
|
||||||
String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(oar.get().getClassid());
|
|
||||||
out
|
|
||||||
.setBestaccessright(
|
|
||||||
|
|
||||||
BestAccessRight
|
|
||||||
.newInstance(
|
|
||||||
code,
|
|
||||||
Constants.COAR_CODE_LABEL_MAP.get(code),
|
|
||||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
|
||||||
}
|
|
||||||
|
|
||||||
final List<String> contributorList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getContributor())
|
|
||||||
.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
|
|
||||||
out.setContributor(contributorList);
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getCountry())
|
|
||||||
.ifPresent(
|
|
||||||
value -> out
|
|
||||||
.setCountry(
|
|
||||||
value
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
c -> {
|
|
||||||
if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
ResultCountry country = new ResultCountry();
|
|
||||||
country.setCode(c.getClassid());
|
|
||||||
country.setLabel(c.getClassname());
|
|
||||||
Optional
|
|
||||||
.ofNullable(c.getDataInfo())
|
|
||||||
.ifPresent(
|
|
||||||
provenance -> country
|
|
||||||
.setProvenance(
|
|
||||||
Provenance
|
|
||||||
.newInstance(
|
|
||||||
provenance
|
|
||||||
.getProvenanceaction()
|
|
||||||
.getClassname(),
|
|
||||||
c.getDataInfo().getTrust())));
|
|
||||||
return country;
|
|
||||||
})
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
final List<String> coverageList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getCoverage())
|
|
||||||
.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
|
|
||||||
out.setCoverage(coverageList);
|
|
||||||
|
|
||||||
out.setDateofcollection(input.getDateofcollection());
|
out.setDateofcollection(input.getDateofcollection());
|
||||||
|
out.setGreen(input.getIsGreen());
|
||||||
final List<String> descriptionList = new ArrayList<>();
|
out.setInDiamondJournal(input.getIsInDiamondJournal());
|
||||||
Optional
|
out.setPubliclyFunded(input.getPubliclyFunded());
|
||||||
.ofNullable(input.getDescription())
|
mapOpenAccessColor(out, input);
|
||||||
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
mapDescription(out, input);
|
||||||
out.setDescription(descriptionList);
|
mapEmbargo(out, input);
|
||||||
Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
|
mapFormat(out, input);
|
||||||
if (oStr.isPresent()) {
|
out.setId(getEntityId(input.getId(), ENTITY_ID_SEPARATOR));
|
||||||
out.setEmbargoenddate(oStr.get().getValue());
|
mapOriginalId(out, input);
|
||||||
}
|
mapInstance(dumpType, out, input);
|
||||||
|
mapLanguage(out, input);
|
||||||
final List<String> formatList = new ArrayList<>();
|
mapLastUpdateTimestamp(out, input);
|
||||||
Optional
|
mapTitle(out, input);
|
||||||
.ofNullable(input.getFormat())
|
mapPid(out, input);
|
||||||
.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
|
mapDateOfAcceptance(out, input);
|
||||||
out.setFormat(formatList);
|
mapPublisher(out, input);
|
||||||
out.setId(input.getId());
|
mapSource(out, input);
|
||||||
out.setOriginalId(new ArrayList<>());
|
mapSubject(out, input);
|
||||||
Optional
|
|
||||||
.ofNullable(input.getOriginalId())
|
|
||||||
.ifPresent(
|
|
||||||
v -> out
|
|
||||||
.setOriginalId(
|
|
||||||
input
|
|
||||||
.getOriginalId()
|
|
||||||
.stream()
|
|
||||||
.filter(s -> !s.startsWith("50|"))
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
|
|
||||||
.ofNullable(input.getInstance());
|
|
||||||
|
|
||||||
if (oInst.isPresent()) {
|
|
||||||
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
|
||||||
((GraphResult) out)
|
|
||||||
.setInstance(
|
|
||||||
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
|
|
||||||
} else {
|
|
||||||
((CommunityResult) out)
|
|
||||||
.setInstance(
|
|
||||||
oInst
|
|
||||||
.get()
|
|
||||||
.stream()
|
|
||||||
.map(ResultMapper::getCommunityInstance)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
|
|
||||||
if (oL.isPresent()) {
|
|
||||||
eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
|
|
||||||
out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname()));
|
|
||||||
}
|
|
||||||
Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
|
|
||||||
if (oLong.isPresent()) {
|
|
||||||
out.setLastupdatetimestamp(oLong.get());
|
|
||||||
}
|
|
||||||
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
|
|
||||||
if (otitle.isPresent()) {
|
|
||||||
List<StructuredProperty> iTitle = otitle
|
|
||||||
.get()
|
|
||||||
.stream()
|
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
if (!iTitle.isEmpty()) {
|
|
||||||
out.setMaintitle(iTitle.get(0).getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
iTitle = otitle
|
|
||||||
.get()
|
|
||||||
.stream()
|
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
if (!iTitle.isEmpty()) {
|
|
||||||
out.setSubtitle(iTitle.get(0).getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getPid())
|
|
||||||
.ifPresent(
|
|
||||||
value -> out
|
|
||||||
.setPid(
|
|
||||||
value
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
p -> ResultPid
|
|
||||||
.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
oStr = Optional.ofNullable(input.getDateofacceptance());
|
|
||||||
if (oStr.isPresent()) {
|
|
||||||
out.setPublicationdate(oStr.get().getValue());
|
|
||||||
}
|
|
||||||
oStr = Optional.ofNullable(input.getPublisher());
|
|
||||||
if (oStr.isPresent()) {
|
|
||||||
out.setPublisher(oStr.get().getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getSource())
|
|
||||||
.ifPresent(
|
|
||||||
value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
List<Subject> subjectList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getSubject())
|
|
||||||
.ifPresent(
|
|
||||||
value -> value
|
|
||||||
.forEach(s -> subjectList.add(getSubject(s))));
|
|
||||||
|
|
||||||
out.setSubjects(subjectList);
|
|
||||||
|
|
||||||
out.setType(input.getResulttype().getClassid());
|
out.setType(input.getResulttype().getClassid());
|
||||||
|
mapMeasure(out, input);
|
||||||
if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||||
((CommunityResult) out)
|
mapCollectedfrom((CommunityResult) out, input);
|
||||||
.setCollectedfrom(
|
mapContext(communityMap, (CommunityResult) out, input);
|
||||||
input
|
|
||||||
.getCollectedfrom()
|
|
||||||
.stream()
|
|
||||||
.map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue()))
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
|
|
||||||
Set<String> communities = communityMap.keySet();
|
|
||||||
List<Context> contextList = Optional
|
|
||||||
.ofNullable(
|
|
||||||
input
|
|
||||||
.getContext())
|
|
||||||
.map(
|
|
||||||
value -> value
|
|
||||||
.stream()
|
|
||||||
.map(c -> {
|
|
||||||
String communityId = c.getId();
|
|
||||||
if (communityId.contains("::")) {
|
|
||||||
communityId = communityId.substring(0, communityId.indexOf("::"));
|
|
||||||
}
|
|
||||||
if (communities.contains(communityId)) {
|
|
||||||
Context context = new Context();
|
|
||||||
context.setCode(communityId);
|
|
||||||
context.setLabel(communityMap.get(communityId));
|
|
||||||
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
|
||||||
if (dataInfo.isPresent()) {
|
|
||||||
List<Provenance> provenance = new ArrayList<>();
|
|
||||||
provenance
|
|
||||||
.addAll(
|
|
||||||
dataInfo
|
|
||||||
.get()
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
di -> Optional
|
|
||||||
.ofNullable(di.getProvenanceaction())
|
|
||||||
.map(
|
|
||||||
provenanceaction -> Provenance
|
|
||||||
.newInstance(
|
|
||||||
provenanceaction.getClassname(),
|
|
||||||
di.getTrust()))
|
|
||||||
.orElse(null))
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.collect(Collectors.toSet()));
|
|
||||||
|
|
||||||
try {
|
|
||||||
context.setProvenance(getUniqueProvenance(provenance));
|
|
||||||
} catch (NoAvailableEntityTypeException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return context;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
})
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.collect(Collectors.toList()))
|
|
||||||
.orElse(new ArrayList<>());
|
|
||||||
|
|
||||||
if (!contextList.isEmpty()) {
|
|
||||||
Set<Integer> hashValue = new HashSet<>();
|
|
||||||
List<Context> remainigContext = new ArrayList<>();
|
|
||||||
contextList.forEach(c -> {
|
|
||||||
if (!hashValue.contains(c.hashCode())) {
|
|
||||||
remainigContext.add(c);
|
|
||||||
hashValue.add(c.hashCode());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
((CommunityResult) out).setContext(remainigContext);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (ClassCastException cce) {
|
} catch (ClassCastException cce) {
|
||||||
return out;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -309,9 +89,343 @@ public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void mapOpenAccessColor(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
if (Optional.ofNullable(input.getOpenAccessColor()).isPresent())
|
||||||
|
switch (input.getOpenAccessColor()) {
|
||||||
|
case bronze:
|
||||||
|
out.setOpenAccessColor(OpenAccessColor.bronze);
|
||||||
|
break;
|
||||||
|
case gold:
|
||||||
|
out.setOpenAccessColor(OpenAccessColor.gold);
|
||||||
|
break;
|
||||||
|
case hybrid:
|
||||||
|
out.setOpenAccessColor(OpenAccessColor.hybrid);
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapContext(Map<String, String> communityMap, CommunityResult out,
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
List<Context> contextList = Optional
|
||||||
|
.ofNullable(
|
||||||
|
input
|
||||||
|
.getContext())
|
||||||
|
.map(
|
||||||
|
value -> value
|
||||||
|
.stream()
|
||||||
|
.map(c -> {
|
||||||
|
String communityId = c.getId();
|
||||||
|
if (communityId.contains("::")) {
|
||||||
|
communityId = communityId.substring(0, communityId.indexOf("::"));
|
||||||
|
}
|
||||||
|
if (communities.contains(communityId)) {
|
||||||
|
Context context = new Context();
|
||||||
|
context.setCode(communityId);
|
||||||
|
context.setLabel(communityMap.get(communityId));
|
||||||
|
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
||||||
|
if (dataInfo.isPresent()) {
|
||||||
|
List<Provenance> provenance = new ArrayList<>();
|
||||||
|
provenance
|
||||||
|
.addAll(
|
||||||
|
dataInfo
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
di -> Optional
|
||||||
|
.ofNullable(di.getProvenanceaction())
|
||||||
|
.map(
|
||||||
|
provenanceaction -> Provenance
|
||||||
|
.newInstance(
|
||||||
|
provenanceaction.getClassname(),
|
||||||
|
di.getTrust()))
|
||||||
|
.orElse(null))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toSet()));
|
||||||
|
|
||||||
|
try {
|
||||||
|
context.setProvenance(getUniqueProvenance(provenance));
|
||||||
|
} catch (NoAvailableEntityTypeException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>());
|
||||||
|
|
||||||
|
if (!contextList.isEmpty()) {
|
||||||
|
Set<Integer> hashValue = new HashSet<>();
|
||||||
|
List<Context> remainigContext = new ArrayList<>();
|
||||||
|
contextList.forEach(c -> {
|
||||||
|
if (!hashValue.contains(c.hashCode())) {
|
||||||
|
remainigContext.add(c);
|
||||||
|
hashValue.add(c.hashCode());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
out.setContext(remainigContext);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapCollectedfrom(CommunityResult out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
out
|
||||||
|
.setCollectedfrom(
|
||||||
|
input
|
||||||
|
.getCollectedfrom()
|
||||||
|
.stream()
|
||||||
|
.map(cf -> CfHbKeyValue.newInstance(getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), cf.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapMeasure(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
if (Optional.ofNullable(input.getMeasures()).isPresent() && input.getMeasures().size() > 0) {
|
||||||
|
|
||||||
|
out.setIndicators(Utils.getIndicator(input.getMeasures()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapSubject(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
List<Subject> subjectList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getSubject())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.stream()
|
||||||
|
// .filter(
|
||||||
|
// s -> !((s.getQualifier().getClassid().equalsIgnoreCase("fos") &&
|
||||||
|
// Optional.ofNullable(s.getDataInfo()).isPresent()
|
||||||
|
// && Optional.ofNullable(s.getDataInfo().getProvenanceaction()).isPresent() &&
|
||||||
|
// s.getDataInfo().getProvenanceaction().getClassid().equalsIgnoreCase("subject:fos"))
|
||||||
|
// ||
|
||||||
|
// (s.getQualifier().getClassid().equalsIgnoreCase("sdg") &&
|
||||||
|
// Optional.ofNullable(s.getDataInfo()).isPresent()
|
||||||
|
// && Optional.ofNullable(s.getDataInfo().getProvenanceaction()).isPresent() &&
|
||||||
|
// s
|
||||||
|
// .getDataInfo()
|
||||||
|
// .getProvenanceaction()
|
||||||
|
// .getClassid()
|
||||||
|
// .equalsIgnoreCase("subject:sdg"))))
|
||||||
|
.filter(s -> !s.getValue().equalsIgnoreCase(NULL))
|
||||||
|
.forEach(s -> subjectList.add(getSubject(s))));
|
||||||
|
|
||||||
|
out.setSubjects(subjectList);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapSource(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getSource())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapPublisher(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional<Field<String>> oStr;
|
||||||
|
oStr = Optional.ofNullable(input.getPublisher());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setPublisher(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapDateOfAcceptance(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional<Field<String>> oStr;
|
||||||
|
oStr = Optional.ofNullable(input.getDateofacceptance());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setPublicationdate(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapPid(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out
|
||||||
|
.setPid(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
p -> ResultPid
|
||||||
|
.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapTitle(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
|
||||||
|
if (otitle.isPresent()) {
|
||||||
|
List<StructuredProperty> iTitle = otitle
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (!iTitle.isEmpty()) {
|
||||||
|
out.setMaintitle(iTitle.get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
iTitle = otitle
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (!iTitle.isEmpty()) {
|
||||||
|
out.setSubtitle(iTitle.get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapLastUpdateTimestamp(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
|
||||||
|
if (oLong.isPresent()) {
|
||||||
|
out.setLastupdatetimestamp(oLong.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapLanguage(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional<Qualifier> oL = Optional.ofNullable(input.getLanguage());
|
||||||
|
if (oL.isPresent()) {
|
||||||
|
Qualifier language = oL.get();
|
||||||
|
out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapInstance(String dumpType, Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
|
||||||
|
.ofNullable(input.getInstance());
|
||||||
|
|
||||||
|
if (oInst.isPresent()) {
|
||||||
|
if (DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||||
|
((GraphResult) out)
|
||||||
|
.setInstance(
|
||||||
|
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
|
||||||
|
} else {
|
||||||
|
((CommunityResult) out)
|
||||||
|
.setInstance(
|
||||||
|
oInst
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.map(ResultMapper::getCommunityInstance)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapOriginalId(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
out.setOriginalId(new ArrayList<>());
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getOriginalId())
|
||||||
|
.ifPresent(
|
||||||
|
v -> out
|
||||||
|
.setOriginalId(
|
||||||
|
input
|
||||||
|
.getOriginalId()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> !s.startsWith("50|"))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapFormat(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
final List<String> formatList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getFormat())
|
||||||
|
.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
|
||||||
|
out.setFormat(formatList);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapEmbargo(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setEmbargoenddate(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapDescription(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
final List<String> descriptionList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getDescription())
|
||||||
|
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
||||||
|
out.setDescription(descriptionList);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapCoverage(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
final List<String> coverageList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCoverage())
|
||||||
|
.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
|
||||||
|
out.setCoverage(coverageList);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapCountry(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out
|
||||||
|
.setCountry(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
c -> {
|
||||||
|
if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
ResultCountry country = new ResultCountry();
|
||||||
|
country.setCode(c.getClassid());
|
||||||
|
country.setLabel(c.getClassname());
|
||||||
|
Optional
|
||||||
|
.ofNullable(c.getDataInfo())
|
||||||
|
.ifPresent(
|
||||||
|
provenance -> country
|
||||||
|
.setProvenance(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
provenance
|
||||||
|
.getProvenanceaction()
|
||||||
|
.getClassname(),
|
||||||
|
c.getDataInfo().getTrust())));
|
||||||
|
return country;
|
||||||
|
})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapContributor(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
final List<String> contributorList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getContributor())
|
||||||
|
.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
|
||||||
|
out.setContributor(contributorList);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapAccessRight(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
// I do not map Access Right UNKNOWN or OTHER
|
||||||
|
|
||||||
|
Optional<Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
||||||
|
if (oar.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(oar.get().getClassid())) {
|
||||||
|
String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(oar.get().getClassid());
|
||||||
|
out
|
||||||
|
.setBestaccessright(
|
||||||
|
|
||||||
|
BestAccessRight
|
||||||
|
.newInstance(
|
||||||
|
code,
|
||||||
|
Constants.COAR_CODE_LABEL_MAP.get(code),
|
||||||
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void mapAuthor(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getAuthor())
|
||||||
|
.ifPresent(
|
||||||
|
ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList())));
|
||||||
|
}
|
||||||
|
|
||||||
private static void addTypeSpecificInformation(Result out, eu.dnetlib.dhp.schema.oaf.Result input,
|
private static void addTypeSpecificInformation(Result out, eu.dnetlib.dhp.schema.oaf.Result input,
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort) throws NoAvailableEntityTypeException {
|
eu.dnetlib.dhp.schema.oaf.Qualifier ort) throws NoAvailableEntityTypeException {
|
||||||
switch (ort.get().getClassid()) {
|
switch (ort.getClassid()) {
|
||||||
case "publication":
|
case "publication":
|
||||||
Optional<Journal> journal = Optional
|
Optional<Journal> journal = Optional
|
||||||
.ofNullable(((Publication) input).getJournal());
|
.ofNullable(((Publication) input).getJournal());
|
||||||
|
@ -425,14 +539,24 @@ public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
setCommonValue(i, instance);
|
setCommonValue(i, instance);
|
||||||
|
|
||||||
instance
|
if (Optional.ofNullable(i.getCollectedfrom()).isPresent() &&
|
||||||
.setCollectedfrom(
|
Optional.ofNullable(i.getCollectedfrom().getKey()).isPresent() &&
|
||||||
CfHbKeyValue
|
StringUtils.isNotBlank(i.getCollectedfrom().getKey()))
|
||||||
.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
|
instance
|
||||||
|
.setCollectedfrom(
|
||||||
|
CfHbKeyValue
|
||||||
|
.newInstance(
|
||||||
|
getEntityId(i.getCollectedfrom().getKey(), ENTITY_ID_SEPARATOR),
|
||||||
|
i.getCollectedfrom().getValue()));
|
||||||
|
|
||||||
instance
|
if (Optional.ofNullable(i.getHostedby()).isPresent() &&
|
||||||
.setHostedby(
|
Optional.ofNullable(i.getHostedby().getKey()).isPresent() &&
|
||||||
CfHbKeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
|
StringUtils.isNotBlank(i.getHostedby().getKey()))
|
||||||
|
instance
|
||||||
|
.setHostedby(
|
||||||
|
CfHbKeyValue
|
||||||
|
.newInstance(
|
||||||
|
getEntityId(i.getHostedby().getKey(), ENTITY_ID_SEPARATOR), i.getHostedby().getValue()));
|
||||||
|
|
||||||
return instance;
|
return instance;
|
||||||
|
|
||||||
|
@ -452,16 +576,6 @@ public class ResultMapper implements Serializable {
|
||||||
Constants.COAR_CODE_LABEL_MAP.get(code),
|
Constants.COAR_CODE_LABEL_MAP.get(code),
|
||||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
|
|
||||||
Optional<List<eu.dnetlib.dhp.schema.oaf.Measure>> mes = Optional.ofNullable(i.getMeasures());
|
|
||||||
if (mes.isPresent()) {
|
|
||||||
List<Measure> measure = new ArrayList<>();
|
|
||||||
mes
|
|
||||||
.get()
|
|
||||||
.forEach(
|
|
||||||
m -> m.getUnit().forEach(u -> measure.add(Measure.newInstance(m.getId(), u.getValue()))));
|
|
||||||
instance.setMeasures(measure);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opAr.get().getOpenAccessRoute() != null) {
|
if (opAr.get().getOpenAccessRoute() != null) {
|
||||||
switch (opAr.get().getOpenAccessRoute()) {
|
switch (opAr.get().getOpenAccessRoute()) {
|
||||||
case hybrid:
|
case hybrid:
|
||||||
|
@ -584,7 +698,9 @@ public class ResultMapper implements Serializable {
|
||||||
if (di.isPresent()) {
|
if (di.isPresent()) {
|
||||||
Provenance p = new Provenance();
|
Provenance p = new Provenance();
|
||||||
p.setProvenance(di.get().getProvenanceaction().getClassname());
|
p.setProvenance(di.get().getProvenanceaction().getClassname());
|
||||||
p.setTrust(di.get().getTrust());
|
if (!s.getQualifier().getClassid().equalsIgnoreCase("fos") &&
|
||||||
|
!s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
||||||
|
p.setTrust(di.get().getTrust());
|
||||||
subject.setProvenance(p);
|
subject.setProvenance(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,16 +13,13 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
|
* This class connects with the community APIs for production. It saves the information about the
|
||||||
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
|
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
|
||||||
* - research infrastructure/initiative , the value is the label of the research community - research
|
* - research infrastructure/initiative , the value is the label of the research community - research
|
||||||
* infrastructure/initiative.
|
* infrastructure/initiative.
|
||||||
|
@ -31,11 +28,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
public class SaveCommunityMap implements Serializable {
|
public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
||||||
private final transient QueryInformationSystem queryInformationSystem;
|
private final transient UtilCommunityAPI queryInformationSystem;
|
||||||
|
|
||||||
private final transient BufferedWriter writer;
|
private final transient BufferedWriter writer;
|
||||||
|
|
||||||
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException {
|
||||||
final Configuration conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
@ -45,8 +42,7 @@ public class SaveCommunityMap implements Serializable {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
queryInformationSystem = new QueryInformationSystem();
|
queryInformationSystem = new UtilCommunityAPI();
|
||||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
|
||||||
|
|
||||||
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
|
@ -68,9 +64,6 @@ public class SaveCommunityMap implements Serializable {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
|
||||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
|
||||||
|
|
||||||
final Boolean singleCommunity = Optional
|
final Boolean singleCommunity = Optional
|
||||||
.ofNullable(parser.get("singleDeposition"))
|
.ofNullable(parser.get("singleDeposition"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
|
@ -78,14 +71,14 @@ public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
|
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
|
||||||
|
|
||||||
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
|
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode);
|
||||||
|
|
||||||
scm.saveCommunityMap(singleCommunity, community_id);
|
scm.saveCommunityMap(singleCommunity, community_id);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void saveCommunityMap(boolean singleCommunity, String communityId)
|
private void saveCommunityMap(boolean singleCommunity, String communityId)
|
||||||
throws ISLookUpException, IOException, DocumentException, SAXException {
|
throws IOException {
|
||||||
final String communityMapString = Utils.OBJECT_MAPPER
|
final String communityMapString = Utils.OBJECT_MAPPER
|
||||||
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
|
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
|
||||||
log.info("communityMap {} ", communityMapString);
|
log.info("communityMap {} ", communityMapString);
|
||||||
|
|
|
@ -3,21 +3,31 @@ package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.*;
|
import org.apache.hadoop.fs.*;
|
||||||
|
import org.apache.http.HttpStatus;
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
|
||||||
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
|
||||||
|
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
|
||||||
|
|
||||||
public class SendToZenodoHDFS implements Serializable {
|
public class SendToZenodoHDFS implements Serializable {
|
||||||
|
|
||||||
private static final String NEW = "new"; // to be used for a brand new deposition in zenodo
|
private static final String NEW = "new"; // to be used for a brand new deposition in zenodo
|
||||||
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
|
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
|
||||||
private static final String UPDATE = "update"; // to upload content to an open deposition not published
|
private static final String UPDATE = "update"; // to upload content to an open deposition not published
|
||||||
|
private static final Integer NUMBER_OF_RETRIES = 5;
|
||||||
|
private static final Integer DELAY = 10;
|
||||||
|
private static final Integer MULTIPLIER = 5;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SendToZenodoHDFS.class);
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -79,22 +89,44 @@ public class SendToZenodoHDFS implements Serializable {
|
||||||
|
|
||||||
Path p = fileStatus.getPath();
|
Path p = fileStatus.getPath();
|
||||||
String pString = p.toString();
|
String pString = p.toString();
|
||||||
|
boolean retry = true;
|
||||||
|
int numberOfRetries = 0;
|
||||||
|
|
||||||
if (!pString.endsWith("_SUCCESS")) {
|
if (!pString.endsWith("_SUCCESS")) {
|
||||||
String name = pString.substring(pString.lastIndexOf("/") + 1);
|
String name = pString.substring(pString.lastIndexOf("/") + 1);
|
||||||
|
log.info("Upoloading: {}", name);
|
||||||
FSDataInputStream inputStream = fileSystem.open(p);
|
FSDataInputStream inputStream = fileSystem.open(p);
|
||||||
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
while (retry && numberOfRetries < NUMBER_OF_RETRIES) {
|
||||||
|
int response_code = -1;
|
||||||
|
|
||||||
|
try {
|
||||||
|
response_code = zenodoApiClient
|
||||||
|
.uploadIS3(inputStream, name, fileSystem.getFileStatus(p).getLen());
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.info(e.getMessage());
|
||||||
|
throw new RuntimeException("Error while uploading on Zenodo");
|
||||||
|
}
|
||||||
|
log.info("response code: {}", response_code);
|
||||||
|
if (HttpStatus.SC_OK == response_code || HttpStatus.SC_CREATED == response_code) {
|
||||||
|
retry = false;
|
||||||
|
} else {
|
||||||
|
numberOfRetries += 1;
|
||||||
|
TimeUnit.SECONDS.sleep(DELAY * MULTIPLIER ^ numberOfRetries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (numberOfRetries == NUMBER_OF_RETRIES) {
|
||||||
|
throw new RuntimeException("reached the maximun number or retries to upload on Zenodo");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
// log.info(DateTime.now().toDateTimeISO().toString());
|
||||||
|
TimeUnit.SECONDS.sleep(DELAY);
|
||||||
|
// log.info("Delayed: {}", DateTime.now().toDateTimeISO().toString());
|
||||||
}
|
}
|
||||||
if (!metadata.equals("")) {
|
if (!metadata.equals("")) {
|
||||||
zenodoApiClient.sendMretadata(metadata);
|
zenodoApiClient.sendMretadata(metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Boolean.TRUE.equals(publish)) {
|
|
||||||
zenodoApiClient.publish();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 22/09/23
|
||||||
|
*/
|
||||||
|
public class SparkCopyGraph implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkCopyGraph.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkCopyGraph.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/copygraph_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String hivePath = parser.get("hivePath");
|
||||||
|
log.info("hivePath: {}", hivePath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark ->
|
||||||
|
|
||||||
|
execCopy(
|
||||||
|
spark,
|
||||||
|
hivePath,
|
||||||
|
outputPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void execCopy(SparkSession spark, String hivePath, String outputPath) {
|
||||||
|
|
||||||
|
ModelSupport.oafTypes.entrySet().parallelStream().forEach(entry -> {
|
||||||
|
String entityType = entry.getKey();
|
||||||
|
Class<?> clazz = entry.getValue();
|
||||||
|
// if (!entityType.equalsIgnoreCase("relation")) {
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.schema(Encoders.bean(clazz).schema())
|
||||||
|
.parquet(hivePath + "/" + entityType)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/" + entityType);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,203 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.communityapi.model.*;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
public class UtilCommunityAPI {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
|
||||||
|
|
||||||
|
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
|
||||||
|
throws IOException {
|
||||||
|
if (singleCommunity)
|
||||||
|
return getMap(Arrays.asList(getCommunity(communityId)));
|
||||||
|
return getMap(getValidCommunities());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private CommunityMap getMap(List<CommunityModel> communities) {
|
||||||
|
final CommunityMap map = new CommunityMap();
|
||||||
|
communities.forEach(c -> map.put(c.getId(), c.getName()));
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getCommunityCsv(List<String> comms) {
|
||||||
|
return comms.stream().map(c -> {
|
||||||
|
try {
|
||||||
|
CommunityModel community = getCommunity(c);
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append(DHPUtils.md5(community.getId()));
|
||||||
|
builder.append(Constants.SEP);
|
||||||
|
builder.append(community.getName());
|
||||||
|
builder.append(Constants.SEP);
|
||||||
|
builder.append(community.getId());
|
||||||
|
builder.append(Constants.SEP);
|
||||||
|
builder
|
||||||
|
.append(
|
||||||
|
community.getDescription());
|
||||||
|
return builder.toString();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}).collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<CommunityModel> getValidCommunities() throws IOException {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
return mapper
|
||||||
|
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public"))
|
||||||
|
&&
|
||||||
|
(community.getType().equals("ri") || community.getType().equals("community")))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private CommunityModel getCommunity(String id) throws IOException {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
return mapper
|
||||||
|
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ContextInfo> getContextInformation() throws IOException {
|
||||||
|
|
||||||
|
return getValidCommunities()
|
||||||
|
.stream()
|
||||||
|
.map(c -> getContext(c))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContextInfo getContext(CommunityModel c) {
|
||||||
|
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
cinfo.setId(c.getId());
|
||||||
|
cinfo.setDescription(c.getDescription());
|
||||||
|
CommunityModel cm = null;
|
||||||
|
try {
|
||||||
|
cm = getCommunity(c.getId());
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
cinfo.setSubject(new ArrayList<>());
|
||||||
|
cinfo.getSubject().addAll(cm.getSubjects());
|
||||||
|
cinfo.setZenodocommunity(c.getZenodoCommunity());
|
||||||
|
cinfo.setType(c.getType());
|
||||||
|
return cinfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ContextInfo> getContextRelation() throws IOException {
|
||||||
|
return getValidCommunities().stream().map(c -> {
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
cinfo.setId(c.getId());
|
||||||
|
cinfo.setDatasourceList(getDatasourceList(c.getId()));
|
||||||
|
cinfo.setProjectList(getProjectList(c.getId()));
|
||||||
|
|
||||||
|
return cinfo;
|
||||||
|
}).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getDatasourceList(String id) {
|
||||||
|
List<String> datasourceList = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
|
||||||
|
new ObjectMapper()
|
||||||
|
.readValue(
|
||||||
|
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
|
||||||
|
DatasourceList.class)
|
||||||
|
.stream()
|
||||||
|
.forEach(ds -> {
|
||||||
|
if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) {
|
||||||
|
|
||||||
|
datasourceList.add(ds.getOpenaireId());
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
return datasourceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getProjectList(String id) {
|
||||||
|
int page = -1;
|
||||||
|
int size = 100;
|
||||||
|
ContentModel cm = null;
|
||||||
|
;
|
||||||
|
ArrayList<String> projectList = new ArrayList<>();
|
||||||
|
do {
|
||||||
|
page++;
|
||||||
|
try {
|
||||||
|
cm = new ObjectMapper()
|
||||||
|
.readValue(
|
||||||
|
eu.dnetlib.dhp.communityapi.QueryCommunityAPI
|
||||||
|
.communityProjects(
|
||||||
|
id, String.valueOf(page), String.valueOf(size)),
|
||||||
|
ContentModel.class);
|
||||||
|
if (cm.getContent().size() > 0) {
|
||||||
|
cm.getContent().forEach(p -> {
|
||||||
|
if (Optional.ofNullable(p.getOpenaireId()).isPresent())
|
||||||
|
projectList.add(p.getOpenaireId());
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
} while (!cm.getLast());
|
||||||
|
|
||||||
|
return projectList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* it returns for each organization the list of associated communities
|
||||||
|
*/
|
||||||
|
public CommunityEntityMap getCommunityOrganization() throws IOException {
|
||||||
|
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
||||||
|
getValidCommunities()
|
||||||
|
.forEach(community -> {
|
||||||
|
String id = community.getId();
|
||||||
|
try {
|
||||||
|
List<String> associatedOrgs = MAPPER
|
||||||
|
.readValue(
|
||||||
|
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id),
|
||||||
|
OrganizationList.class);
|
||||||
|
associatedOrgs.forEach(o -> {
|
||||||
|
if (!organizationMap
|
||||||
|
.keySet()
|
||||||
|
.contains(o))
|
||||||
|
organizationMap.put(o, new ArrayList<>());
|
||||||
|
organizationMap.get(o).add(community.getId());
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return organizationMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,9 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Constants.*;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -11,6 +16,7 @@ import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
@ -18,12 +24,20 @@ import com.google.gson.Gson;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
|
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Indicator;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Score;
|
||||||
|
import eu.dnetlib.dhp.oa.model.UsageCounts;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Relation;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Measure;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import scala.Tuple2;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class Utils {
|
public class Utils {
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
public static final String ENTITY_ID_SEPARATOR = "|";
|
||||||
|
|
||||||
private Utils() {
|
private Utils() {
|
||||||
}
|
}
|
||||||
|
@ -40,15 +54,11 @@ public class Utils {
|
||||||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
|
|
||||||
return ISLookupClientFactory.getLookUpService(isLookUpUrl);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getContextId(String id) {
|
public static String getContextId(String id) {
|
||||||
|
|
||||||
return String
|
return String
|
||||||
.format(
|
.format(
|
||||||
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
|
"%s::%s", Constants.CONTEXT_NS_PREFIX,
|
||||||
DHPUtils.md5(id));
|
DHPUtils.md5(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,4 +84,117 @@ public class Utils {
|
||||||
return new Gson().fromJson(sb.toString(), CommunityMap.class);
|
return new Gson().fromJson(sb.toString(), CommunityMap.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getEntityId(String id, String separator) {
|
||||||
|
return id.substring(id.indexOf(separator) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Dataset<String> getEntitiesId(SparkSession spark, String inputPath) {
|
||||||
|
Dataset<String> dumpedIds = Utils
|
||||||
|
.readPath(spark, inputPath + "/publication", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/dataset", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/software", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class)
|
||||||
|
.map(
|
||||||
|
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Organization, String>) o -> o.getId(),
|
||||||
|
Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class)
|
||||||
|
.map(
|
||||||
|
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Project, String>) o -> o.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class)
|
||||||
|
.map(
|
||||||
|
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
|
||||||
|
Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class)
|
||||||
|
.map((MapFunction<ResearchCommunity, String>) c -> c.getId(), Encoders.STRING()));
|
||||||
|
return dumpedIds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Dataset<Relation> getValidRelations(Dataset<Relation> relations,
|
||||||
|
Dataset<String> entitiesIds) {
|
||||||
|
Dataset<Tuple2<String, Relation>> relationSource = relations
|
||||||
|
.map(
|
||||||
|
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getSource(), r),
|
||||||
|
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
|
||||||
|
|
||||||
|
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
|
||||||
|
.joinWith(entitiesIds, relationSource.col("_1").equalTo(entitiesIds.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
|
||||||
|
t2._1()._2().getTarget(), t2._1()._2()),
|
||||||
|
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
|
||||||
|
|
||||||
|
return relJoinSource
|
||||||
|
.joinWith(entitiesIds, relJoinSource.col("_1").equalTo(entitiesIds.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
|
||||||
|
Encoders.bean(Relation.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Indicator getIndicator(List<Measure> measures) {
|
||||||
|
Indicator i = new Indicator();
|
||||||
|
for (eu.dnetlib.dhp.schema.oaf.Measure m : measures) {
|
||||||
|
switch (m.getId()) {
|
||||||
|
case USAGE_COUNT_DOWNLOADS:
|
||||||
|
getUsageCounts(i).setDownloads(m.getUnit().get(0).getValue());
|
||||||
|
break;
|
||||||
|
case USAGE_COUNT_VIEWS:
|
||||||
|
getUsageCounts(i).setViews(m.getUnit().get(0).getValue());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
getImpactMeasure(i).add(getScore(m.getId(), m.getUnit()));
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private static UsageCounts getUsageCounts(Indicator i) {
|
||||||
|
if (i.getUsageCounts() == null) {
|
||||||
|
i.setUsageCounts(new UsageCounts());
|
||||||
|
}
|
||||||
|
return i.getUsageCounts();
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private static List<Score> getImpactMeasure(Indicator i) {
|
||||||
|
if (i.getBipIndicators() == null) {
|
||||||
|
i.setBipIndicators(new ArrayList<>());
|
||||||
|
}
|
||||||
|
return i.getBipIndicators();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Score getScore(String indicator, List<KeyValue> unit) {
|
||||||
|
Score s = new Score();
|
||||||
|
s.setIndicator(indicator);
|
||||||
|
for (KeyValue u : unit) {
|
||||||
|
if (u.getKey().equals("score")) {
|
||||||
|
s.setScore(u.getValue());
|
||||||
|
} else {
|
||||||
|
s.setClazz(u.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,10 +9,14 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||||
import eu.dnetlib.dhp.oa.model.community.Context;
|
import eu.dnetlib.dhp.oa.model.community.Context;
|
||||||
|
@ -51,31 +55,21 @@ public class CommunitySplit implements Serializable {
|
||||||
communities
|
communities
|
||||||
.keySet()
|
.keySet()
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_")));
|
.parallel()
|
||||||
|
.forEach(c -> {
|
||||||
|
result
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<CommunityResult>) r -> Optional.ofNullable(r.getContext()).isPresent() &&
|
||||||
|
r.getContext().stream().anyMatch(con -> con.getCode().equals(c)))
|
||||||
|
.map(
|
||||||
|
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
||||||
|
Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(outputPath + "/" + c.replace(" ", "_"));
|
||||||
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
|
|
||||||
Dataset<CommunityResult> communityProducts = result
|
|
||||||
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
|
|
||||||
|
|
||||||
communityProducts
|
|
||||||
.write()
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.json(outputPath);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean containsCommunity(CommunityResult r, String c) {
|
|
||||||
if (Optional.ofNullable(r.getContext()).isPresent()) {
|
|
||||||
return r
|
|
||||||
.getContext()
|
|
||||||
.stream()
|
|
||||||
.map(Context::getCode)
|
|
||||||
.collect(Collectors.toList())
|
|
||||||
.contains(c);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,36 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Optional;
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,22 +66,90 @@ public class SparkDumpCommunityProducts implements Serializable {
|
||||||
final String resultClassName = parser.get("resultTableName");
|
final String resultClassName = parser.get("resultTableName");
|
||||||
log.info("resultTableName: {}", resultClassName);
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
String communityMapPath = parser.get("communityMapPath");
|
String communityMapPath = Optional
|
||||||
|
.ofNullable(parser.get("communityMapPath"))
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
final String dumpType = Optional
|
String dumpType = Optional
|
||||||
.ofNullable(parser.get("dumpType"))
|
.ofNullable(parser.get("dumpType"))
|
||||||
.map(String::valueOf)
|
.orElse(null);
|
||||||
.orElse("community");
|
|
||||||
|
|
||||||
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
DumpProducts dump = new DumpProducts();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
dump
|
runWithSparkSession(
|
||||||
.run(
|
conf,
|
||||||
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
|
isSparkSessionManaged,
|
||||||
dumpType);
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
resultDump(
|
||||||
|
spark, inputPath, outputPath, communityMapPath, inputClazz, dumpType);
|
||||||
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <I extends OafEntity> void resultDump(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
String communityMapPath,
|
||||||
|
Class<I> inputClazz,
|
||||||
|
String dumpType) {
|
||||||
|
|
||||||
|
CommunityMap communityMap = null;
|
||||||
|
if (!StringUtils.isEmpty(communityMapPath))
|
||||||
|
communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
|
||||||
|
CommunityMap finalCommunityMap = communityMap;
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<I, CommunityResult>) value -> execMap(value, finalCommunityMap, dumpType),
|
||||||
|
Encoders.bean(CommunityResult.class))
|
||||||
|
.filter((FilterFunction<CommunityResult>) value -> value != null)
|
||||||
|
.map(
|
||||||
|
(MapFunction<CommunityResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends OafEntity, O extends eu.dnetlib.dhp.oa.model.Result> O execMap(I value,
|
||||||
|
CommunityMap communityMap, String dumpType) throws NoAvailableEntityTypeException, CardinalityTooHighException {
|
||||||
|
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||||
|
if (Boolean.FALSE.equals(odInfo.isPresent())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|
||||||
|
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (StringUtils.isEmpty(dumpType)) {
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
|
||||||
|
Optional<List<Context>> inputContext = Optional
|
||||||
|
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
|
||||||
|
if (!inputContext.isPresent()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
List<String> toDumpFor = inputContext.get().stream().map(c -> {
|
||||||
|
if (communities.contains(c.getId())) {
|
||||||
|
return c.getId();
|
||||||
|
}
|
||||||
|
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
|
||||||
|
return c.getId().substring(0, c.getId().indexOf("::"));
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
if (toDumpFor.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (O) ResultMapper.map(value, communityMap, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
@ -61,6 +63,13 @@ public class SparkPrepareResultProject implements Serializable {
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
Boolean substring = Optional
|
||||||
|
.ofNullable(parser.get("substring"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
final String inputPath = parser.get("sourcePath");
|
final String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
@ -74,11 +83,12 @@ public class SparkPrepareResultProject implements Serializable {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
prepareResultProjectList(spark, inputPath, outputPath);
|
prepareResultProjectList(spark, inputPath, outputPath, substring);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
|
private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Boolean substring) {
|
||||||
Dataset<Relation> relation = Utils
|
Dataset<Relation> relation = Utils
|
||||||
.readPath(spark, inputPath + "/relation", Relation.class)
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
.filter(
|
.filter(
|
||||||
|
@ -101,7 +111,10 @@ public class SparkPrepareResultProject implements Serializable {
|
||||||
Set<String> projectSet = new HashSet<>();
|
Set<String> projectSet = new HashSet<>();
|
||||||
Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next();
|
Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next();
|
||||||
ResultProject rp = new ResultProject();
|
ResultProject rp = new ResultProject();
|
||||||
rp.setResultId(s);
|
if (substring)
|
||||||
|
rp.setResultId(getEntityId(s, ENTITY_ID_SEPARATOR));
|
||||||
|
else
|
||||||
|
rp.setResultId(s);
|
||||||
eu.dnetlib.dhp.schema.oaf.Project p = first._1();
|
eu.dnetlib.dhp.schema.oaf.Project p = first._1();
|
||||||
projectSet.add(p.getId());
|
projectSet.add(p.getId());
|
||||||
Project ps = getProject(p, first._2);
|
Project ps = getProject(p, first._2);
|
||||||
|
@ -131,7 +144,7 @@ public class SparkPrepareResultProject implements Serializable {
|
||||||
private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) {
|
private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) {
|
||||||
Project p = Project
|
Project p = Project
|
||||||
.newInstance(
|
.newInstance(
|
||||||
op.getId(),
|
getEntityId(op.getId(), ENTITY_ID_SEPARATOR),
|
||||||
op.getCode().getValue(),
|
op.getCode().getValue(),
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(op.getAcronym())
|
.ofNullable(op.getAcronym())
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -71,6 +72,7 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
String preparedInfoPath) {
|
String preparedInfoPath) {
|
||||||
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
||||||
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
||||||
|
|
||||||
result
|
result
|
||||||
.joinWith(
|
.joinWith(
|
||||||
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
||||||
|
@ -80,10 +82,13 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
|
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
|
||||||
return r;
|
return r;
|
||||||
}, Encoders.bean(CommunityResult.class))
|
}, Encoders.bean(CommunityResult.class))
|
||||||
|
.map(
|
||||||
|
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
||||||
|
Encoders.STRING())
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Append)
|
.mode(SaveMode.Append)
|
||||||
.json(outputPath);
|
.text(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,9 +20,9 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
|
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
|
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
|
||||||
|
@ -52,13 +52,10 @@ public class CreateContextEntities implements Serializable {
|
||||||
final String hdfsNameNode = parser.get("nameNode");
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
log.info("nameNode: {}", hdfsNameNode);
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
|
||||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
|
||||||
|
|
||||||
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
|
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
|
||||||
|
|
||||||
log.info("Processing contexts...");
|
log.info("Processing contexts...");
|
||||||
cce.execute(Process::getEntity, isLookUpUrl);
|
cce.execute(Process::getEntity);
|
||||||
|
|
||||||
cce.close();
|
cce.close();
|
||||||
|
|
||||||
|
@ -87,15 +84,14 @@ public class CreateContextEntities implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
|
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer)
|
||||||
throws ISLookUpException {
|
throws IOException {
|
||||||
|
|
||||||
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
UtilCommunityAPI queryInformationSystem = new UtilCommunityAPI();
|
||||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
|
||||||
|
|
||||||
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
|
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
|
||||||
|
|
||||||
queryInformationSystem.getContextInformation(consumer);
|
queryInformationSystem.getContextInformation().forEach(ci -> consumer.accept(ci));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected <R extends ResearchInitiative> void writeEntity(final R r) {
|
protected <R extends ResearchInitiative> void writeEntity(final R r) {
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
import java.io.BufferedWriter;
|
import java.io.*;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStreamWriter;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
@ -20,13 +18,14 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.*;
|
import eu.dnetlib.dhp.oa.model.graph.*;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
|
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
|
||||||
|
@ -36,10 +35,6 @@ public class CreateContextRelation implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
|
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
|
||||||
private final transient Configuration conf;
|
private final transient Configuration conf;
|
||||||
private final transient BufferedWriter writer;
|
private final transient BufferedWriter writer;
|
||||||
private final transient QueryInformationSystem queryInformationSystem;
|
|
||||||
|
|
||||||
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
|
|
||||||
private static final String CONTEX_RELATION_PROJECT = "projects";
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
|
@ -63,21 +58,14 @@ public class CreateContextRelation implements Serializable {
|
||||||
log.info("hdfsPath: {}", hdfsPath);
|
log.info("hdfsPath: {}", hdfsPath);
|
||||||
|
|
||||||
final String hdfsNameNode = parser.get("nameNode");
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
log.info("nameNode: {}", hdfsNameNode);
|
log.info("hdfsNameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode);
|
||||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
|
||||||
|
|
||||||
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
|
log.info("Creating relation for datasources and projects...");
|
||||||
|
|
||||||
log.info("Creating relation for datasource...");
|
|
||||||
cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));
|
|
||||||
|
|
||||||
log.info("Creating relations for projects... ");
|
|
||||||
cce
|
cce
|
||||||
.execute(
|
.execute(
|
||||||
Process::getRelation, CONTEX_RELATION_PROJECT,
|
Process::getRelation);
|
||||||
ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
|
|
||||||
|
|
||||||
cce.close();
|
cce.close();
|
||||||
|
|
||||||
|
@ -87,15 +75,11 @@ public class CreateContextRelation implements Serializable {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
|
public CreateContextRelation(String hdfsPath, String hdfsNameNode)
|
||||||
throws IOException, ISLookUpException {
|
throws IOException {
|
||||||
this.conf = new Configuration();
|
this.conf = new Configuration();
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
queryInformationSystem = new QueryInformationSystem();
|
|
||||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
|
||||||
queryInformationSystem.execContextRelationQuery();
|
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
@ -109,11 +93,12 @@ public class CreateContextRelation implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
|
public void execute(final Function<ContextInfo, List<Relation>> producer) throws IOException {
|
||||||
|
|
||||||
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
|
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
|
||||||
|
|
||||||
queryInformationSystem.getContextRelation(consumer, category, prefix);
|
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
|
||||||
|
queryCommunityAPI.getContextRelation().forEach(ci -> consumer.accept(ci));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void writeEntity(final Relation r) {
|
protected void writeEntity(final Relation r) {
|
||||||
|
|
|
@ -1,518 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.dom4j.Document;
|
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.dom4j.Node;
|
|
||||||
import org.dom4j.io.SAXReader;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
|
||||||
import eu.dnetlib.dhp.oa.model.*;
|
|
||||||
import eu.dnetlib.dhp.oa.model.graph.*;
|
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Funder;
|
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Project;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
|
|
||||||
* Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
|
|
||||||
*/
|
|
||||||
public class DumpGraphEntities implements Serializable {
|
|
||||||
|
|
||||||
public void run(Boolean isSparkSessionManaged,
|
|
||||||
String inputPath,
|
|
||||||
String outputPath,
|
|
||||||
Class<? extends OafEntity> inputClazz,
|
|
||||||
String communityMapPath) {
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
|
|
||||||
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
|
|
||||||
case "50":
|
|
||||||
DumpProducts d = new DumpProducts();
|
|
||||||
d
|
|
||||||
.run(
|
|
||||||
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, GraphResult.class,
|
|
||||||
eu.dnetlib.dhp.oa.graph.dump.Constants.DUMPTYPE.COMPLETE.getType());
|
|
||||||
break;
|
|
||||||
case "40":
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
|
||||||
projectMap(spark, inputPath, outputPath, inputClazz);
|
|
||||||
|
|
||||||
});
|
|
||||||
break;
|
|
||||||
case "20":
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
|
||||||
organizationMap(spark, inputPath, outputPath, inputClazz);
|
|
||||||
|
|
||||||
});
|
|
||||||
break;
|
|
||||||
case "10":
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
|
||||||
datasourceMap(spark, inputPath, outputPath, inputClazz);
|
|
||||||
|
|
||||||
});
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
|
|
||||||
Class<E> inputClazz) {
|
|
||||||
Utils
|
|
||||||
.readPath(spark, inputPath, inputClazz)
|
|
||||||
.map(
|
|
||||||
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
|
|
||||||
Encoders.bean(Datasource.class))
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
|
|
||||||
Class<E> inputClazz) {
|
|
||||||
Utils
|
|
||||||
.readPath(spark, inputPath, inputClazz)
|
|
||||||
.map(
|
|
||||||
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
|
|
||||||
Encoders.bean(Project.class))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
|
|
||||||
Datasource datasource = new Datasource();
|
|
||||||
|
|
||||||
datasource.setId(d.getId());
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getOriginalId())
|
|
||||||
.ifPresent(
|
|
||||||
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getPid())
|
|
||||||
.ifPresent(
|
|
||||||
pids -> datasource.setPid(pids
|
|
||||||
.stream()
|
|
||||||
.map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDatasourcetype())
|
|
||||||
.ifPresent(
|
|
||||||
dsType -> datasource
|
|
||||||
.setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getOpenairecompatibility())
|
|
||||||
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getOfficialname())
|
|
||||||
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getEnglishname())
|
|
||||||
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getWebsiteurl())
|
|
||||||
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getLogourl())
|
|
||||||
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDateofvalidation())
|
|
||||||
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDescription())
|
|
||||||
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getSubjects())
|
|
||||||
.ifPresent(
|
|
||||||
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getOdpolicies())
|
|
||||||
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getOdlanguages())
|
|
||||||
.ifPresent(
|
|
||||||
langs -> datasource
|
|
||||||
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getOdcontenttypes())
|
|
||||||
.ifPresent(
|
|
||||||
ctypes -> datasource
|
|
||||||
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getReleasestartdate())
|
|
||||||
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getReleaseenddate())
|
|
||||||
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getMissionstatementurl())
|
|
||||||
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDatabaseaccesstype())
|
|
||||||
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDatauploadtype())
|
|
||||||
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDatabaseaccessrestriction())
|
|
||||||
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDatauploadrestriction())
|
|
||||||
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getVersioning())
|
|
||||||
.ifPresent(v -> datasource.setVersioning(v.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getCitationguidelineurl())
|
|
||||||
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getPidsystems())
|
|
||||||
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getCertificates())
|
|
||||||
.ifPresent(c -> datasource.setCertificates(c.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getPolicies())
|
|
||||||
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getJournal())
|
|
||||||
.ifPresent(j -> datasource.setJournal(getContainer(j)));
|
|
||||||
|
|
||||||
return datasource;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Container getContainer(Journal j) {
|
|
||||||
Container c = new Container();
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getName())
|
|
||||||
.ifPresent(n -> c.setName(n));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getIssnPrinted())
|
|
||||||
.ifPresent(issnp -> c.setIssnPrinted(issnp));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getIssnOnline())
|
|
||||||
.ifPresent(issno -> c.setIssnOnline(issno));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getIssnLinking())
|
|
||||||
.ifPresent(isnl -> c.setIssnLinking(isnl));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getEp())
|
|
||||||
.ifPresent(ep -> c.setEp(ep));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getIss())
|
|
||||||
.ifPresent(iss -> c.setIss(iss));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getSp())
|
|
||||||
.ifPresent(sp -> c.setSp(sp));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getVol())
|
|
||||||
.ifPresent(vol -> c.setVol(vol));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getEdition())
|
|
||||||
.ifPresent(edition -> c.setEdition(edition));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getConferencedate())
|
|
||||||
.ifPresent(cdate -> c.setConferencedate(cdate));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(j.getConferenceplace())
|
|
||||||
.ifPresent(cplace -> c.setConferenceplace(cplace));
|
|
||||||
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
|
|
||||||
Project project = new Project();
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getId())
|
|
||||||
.ifPresent(id -> project.setId(id));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getWebsiteurl())
|
|
||||||
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getCode())
|
|
||||||
.ifPresent(code -> project.setCode(code.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getAcronym())
|
|
||||||
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getTitle())
|
|
||||||
.ifPresent(title -> project.setTitle(title.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getStartdate())
|
|
||||||
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getEnddate())
|
|
||||||
.ifPresent(edate -> project.setEnddate(edate.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getCallidentifier())
|
|
||||||
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getKeywords())
|
|
||||||
.ifPresent(key -> project.setKeywords(key.getValue()));
|
|
||||||
|
|
||||||
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
|
|
||||||
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
|
|
||||||
boolean mandate = false;
|
|
||||||
if (omandate.isPresent()) {
|
|
||||||
if (omandate.get().getValue().equals("true")) {
|
|
||||||
mandate = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (oecsc39.isPresent()) {
|
|
||||||
if (oecsc39.get().getValue().equals("true")) {
|
|
||||||
mandate = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
project.setOpenaccessmandateforpublications(mandate);
|
|
||||||
project.setOpenaccessmandatefordataset(false);
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getEcarticle29_3())
|
|
||||||
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
|
|
||||||
|
|
||||||
project
|
|
||||||
.setSubject(
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getSubjects())
|
|
||||||
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
|
|
||||||
.orElse(new ArrayList<>()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getSummary())
|
|
||||||
.ifPresent(summary -> project.setSummary(summary.getValue()));
|
|
||||||
|
|
||||||
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
|
|
||||||
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
|
|
||||||
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
|
|
||||||
|
|
||||||
if (ocurrency.isPresent()) {
|
|
||||||
if (ofundedamount.isPresent()) {
|
|
||||||
if (ototalcost.isPresent()) {
|
|
||||||
project
|
|
||||||
.setGranted(
|
|
||||||
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
|
|
||||||
} else {
|
|
||||||
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
project
|
|
||||||
.setH2020programme(
|
|
||||||
Optional
|
|
||||||
.ofNullable(p.getH2020classification())
|
|
||||||
.map(
|
|
||||||
classification -> classification
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
c -> Programme
|
|
||||||
.newInstance(
|
|
||||||
c.getH2020Programme().getCode(), c.getH2020Programme().getDescription()))
|
|
||||||
.collect(Collectors.toList()))
|
|
||||||
.orElse(new ArrayList<>()));
|
|
||||||
|
|
||||||
Optional<List<Field<String>>> ofundTree = Optional
|
|
||||||
.ofNullable(p.getFundingtree());
|
|
||||||
List<Funder> funList = new ArrayList<>();
|
|
||||||
if (ofundTree.isPresent()) {
|
|
||||||
for (Field<String> fundingtree : ofundTree.get()) {
|
|
||||||
funList.add(getFunder(fundingtree.getValue()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
project.setFunding(funList);
|
|
||||||
|
|
||||||
return project;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Funder getFunder(String fundingtree) throws DocumentException {
|
|
||||||
Funder f = new Funder();
|
|
||||||
final Document doc;
|
|
||||||
|
|
||||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
|
||||||
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
|
||||||
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
|
||||||
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
|
||||||
// f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
|
|
||||||
|
|
||||||
String id = "";
|
|
||||||
String description = "";
|
|
||||||
// List<Levels> fundings = new ArrayList<>();
|
|
||||||
int level = 0;
|
|
||||||
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
|
|
||||||
while (nodes.size() > 0) {
|
|
||||||
for (org.dom4j.Node n : nodes) {
|
|
||||||
|
|
||||||
List node = n.selectNodes("./id");
|
|
||||||
id = ((org.dom4j.Node) node.get(0)).getText();
|
|
||||||
id = id.substring(id.indexOf("::") + 2);
|
|
||||||
|
|
||||||
node = n.selectNodes("./description");
|
|
||||||
description += ((Node) node.get(0)).getText() + " - ";
|
|
||||||
|
|
||||||
}
|
|
||||||
level += 1;
|
|
||||||
nodes = doc.selectNodes("//funding_level_" + level);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!id.equals("")) {
|
|
||||||
Fundings fundings = new Fundings();
|
|
||||||
fundings.setId(id);
|
|
||||||
fundings.setDescription(description.substring(0, description.length() - 3).trim());
|
|
||||||
f.setFunding_stream(fundings);
|
|
||||||
}
|
|
||||||
|
|
||||||
return f;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
|
|
||||||
Class<E> inputClazz) {
|
|
||||||
Utils
|
|
||||||
.readPath(spark, inputPath, inputClazz)
|
|
||||||
.map(
|
|
||||||
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
|
|
||||||
Encoders.bean(Organization.class))
|
|
||||||
.filter((FilterFunction<Organization>) o -> o != null)
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
|
|
||||||
eu.dnetlib.dhp.schema.oaf.Organization org) {
|
|
||||||
if (org.getDataInfo().getDeletedbyinference())
|
|
||||||
return null;
|
|
||||||
Organization organization = new Organization();
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(org.getLegalshortname())
|
|
||||||
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(org.getLegalname())
|
|
||||||
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(org.getWebsiteurl())
|
|
||||||
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(org.getAlternativeNames())
|
|
||||||
.ifPresent(
|
|
||||||
value -> organization
|
|
||||||
.setAlternativenames(
|
|
||||||
value
|
|
||||||
.stream()
|
|
||||||
.map(v -> v.getValue())
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(org.getCountry())
|
|
||||||
.ifPresent(
|
|
||||||
value -> {
|
|
||||||
if (!value.getClassid().equals(Constants.UNKNOWN)) {
|
|
||||||
organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname()));
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(org.getId())
|
|
||||||
.ifPresent(value -> organization.setId(value));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(org.getPid())
|
|
||||||
.ifPresent(
|
|
||||||
value -> organization
|
|
||||||
.setPid(
|
|
||||||
value
|
|
||||||
.stream()
|
|
||||||
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
return organization;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -2,6 +2,8 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
@ -15,7 +17,6 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
import eu.dnetlib.dhp.oa.model.Provenance;
|
import eu.dnetlib.dhp.oa.model.Provenance;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Node;
|
|
||||||
import eu.dnetlib.dhp.oa.model.graph.RelType;
|
import eu.dnetlib.dhp.oa.model.graph.RelType;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Relation;
|
import eu.dnetlib.dhp.oa.model.graph.Relation;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
@ -44,7 +45,6 @@ public class Extractor implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
|
||||||
extractRelationResult(
|
extractRelationResult(
|
||||||
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
|
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
|
||||||
});
|
});
|
||||||
|
@ -86,7 +86,7 @@ public class Extractor implements Serializable {
|
||||||
.orElse(null))
|
.orElse(null))
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
Relation r = getRelation(
|
Relation r = getRelation(
|
||||||
value.getId(), contextId,
|
getEntityId(value.getId(), ENTITY_ID_SEPARATOR), contextId,
|
||||||
Constants.RESULT_ENTITY,
|
Constants.RESULT_ENTITY,
|
||||||
Constants.CONTEXT_ENTITY,
|
Constants.CONTEXT_ENTITY,
|
||||||
ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance);
|
ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance);
|
||||||
|
@ -96,7 +96,7 @@ public class Extractor implements Serializable {
|
||||||
hashCodes.add(r.hashCode());
|
hashCodes.add(r.hashCode());
|
||||||
}
|
}
|
||||||
r = getRelation(
|
r = getRelation(
|
||||||
contextId, value.getId(),
|
contextId, getEntityId(value.getId(), ENTITY_ID_SEPARATOR),
|
||||||
Constants.CONTEXT_ENTITY,
|
Constants.CONTEXT_ENTITY,
|
||||||
Constants.RESULT_ENTITY,
|
Constants.RESULT_ENTITY,
|
||||||
ModelConstants.IS_RELATED_TO,
|
ModelConstants.IS_RELATED_TO,
|
||||||
|
@ -116,7 +116,7 @@ public class Extractor implements Serializable {
|
||||||
}, Encoders.bean(Relation.class))
|
}, Encoders.bean(Relation.class))
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Append)
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -165,8 +165,8 @@ public class Extractor implements Serializable {
|
||||||
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
||||||
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
|
||||||
Relation r = getRelation(
|
Relation r = getRelation(
|
||||||
value.getId(),
|
getEntityId(value.getId(), ENTITY_ID_SEPARATOR),
|
||||||
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
||||||
resultDatasource, ModelConstants.PROVISION,
|
resultDatasource, ModelConstants.PROVISION,
|
||||||
provenance);
|
provenance);
|
||||||
if (!hashCodes.contains(r.hashCode())) {
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
@ -176,7 +176,7 @@ public class Extractor implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
r = getRelation(
|
r = getRelation(
|
||||||
cf.getKey(), value.getId(),
|
getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), getEntityId(value.getId(), ENTITY_ID_SEPARATOR),
|
||||||
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
||||||
datasourceResult, ModelConstants.PROVISION,
|
datasourceResult, ModelConstants.PROVISION,
|
||||||
provenance);
|
provenance);
|
||||||
|
@ -192,8 +192,10 @@ public class Extractor implements Serializable {
|
||||||
private static Relation getRelation(String source, String target, String sourceType, String targetType,
|
private static Relation getRelation(String source, String target, String sourceType, String targetType,
|
||||||
String relName, String relType, Provenance provenance) {
|
String relName, String relType, Provenance provenance) {
|
||||||
Relation r = new Relation();
|
Relation r = new Relation();
|
||||||
r.setSource(Node.newInstance(source, sourceType));
|
r.setSource(source);
|
||||||
r.setTarget(Node.newInstance(target, targetType));
|
r.setSourceType(sourceType);
|
||||||
|
r.setTarget(target);
|
||||||
|
r.setTargetType(targetType);
|
||||||
r.setReltype(RelType.newInstance(relName, relType));
|
r.setReltype(RelType.newInstance(relName, relType));
|
||||||
r.setProvenance(provenance);
|
r.setProvenance(provenance);
|
||||||
return r;
|
return r;
|
||||||
|
|
|
@ -54,40 +54,11 @@ public class Process implements Serializable {
|
||||||
List<Relation> relationList = new ArrayList<>();
|
List<Relation> relationList = new ArrayList<>();
|
||||||
ci
|
ci
|
||||||
.getDatasourceList()
|
.getDatasourceList()
|
||||||
.forEach(ds -> {
|
.forEach(ds -> relationList.addAll(addRelations(ci, ds, ModelSupport.idPrefixEntity.get("10"))));
|
||||||
|
|
||||||
String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2));
|
ci
|
||||||
|
.getProjectList()
|
||||||
String contextId = Utils.getContextId(ci.getId());
|
.forEach(p -> relationList.addAll(addRelations(ci, p, ModelSupport.idPrefixEntity.get("40"))));
|
||||||
relationList
|
|
||||||
.add(
|
|
||||||
Relation
|
|
||||||
.newInstance(
|
|
||||||
Node
|
|
||||||
.newInstance(
|
|
||||||
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY),
|
|
||||||
Node.newInstance(ds, nodeType),
|
|
||||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
|
||||||
Provenance
|
|
||||||
.newInstance(
|
|
||||||
Constants.USER_CLAIM,
|
|
||||||
Constants.DEFAULT_TRUST)));
|
|
||||||
|
|
||||||
relationList
|
|
||||||
.add(
|
|
||||||
Relation
|
|
||||||
.newInstance(
|
|
||||||
Node.newInstance(ds, nodeType),
|
|
||||||
Node
|
|
||||||
.newInstance(
|
|
||||||
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY),
|
|
||||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
|
||||||
Provenance
|
|
||||||
.newInstance(
|
|
||||||
Constants.USER_CLAIM,
|
|
||||||
Constants.DEFAULT_TRUST)));
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
return relationList;
|
return relationList;
|
||||||
|
|
||||||
|
@ -96,4 +67,33 @@ public class Process implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static List<Relation> addRelations(ContextInfo ci, String ds, String nodeType) {
|
||||||
|
List<Relation> relationList = new ArrayList<>();
|
||||||
|
String contextId = Utils.getContextId(ci.getId());
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
Relation
|
||||||
|
.newInstance(
|
||||||
|
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY,
|
||||||
|
ds, nodeType,
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
Constants.USER_CLAIM,
|
||||||
|
Constants.DEFAULT_TRUST)));
|
||||||
|
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
Relation
|
||||||
|
.newInstance(
|
||||||
|
ds, nodeType,
|
||||||
|
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY,
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
Constants.USER_CLAIM,
|
||||||
|
Constants.DEFAULT_TRUST)));
|
||||||
|
return relationList;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,198 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.function.Consumer;
|
|
||||||
|
|
||||||
import org.dom4j.Document;
|
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.dom4j.Element;
|
|
||||||
import org.dom4j.Node;
|
|
||||||
import org.dom4j.io.SAXReader;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class QueryInformationSystem {
|
|
||||||
|
|
||||||
private ISLookUpService isLookUp;
|
|
||||||
private List<String> contextRelationResult;
|
|
||||||
|
|
||||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
||||||
+
|
|
||||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
|
||||||
" and $x//context/param[./@name = 'status']/text() = 'all' " +
|
|
||||||
" return " +
|
|
||||||
"$x//context";
|
|
||||||
|
|
||||||
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
||||||
+
|
|
||||||
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
|
|
||||||
+
|
|
||||||
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
|
|
||||||
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
|
|
||||||
+
|
|
||||||
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
|
|
||||||
|
|
||||||
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
|
|
||||||
|
|
||||||
isLookUp
|
|
||||||
.quickSearchProfile(XQUERY_ENTITY)
|
|
||||||
.forEach(c -> {
|
|
||||||
ContextInfo cinfo = new ContextInfo();
|
|
||||||
String[] cSplit = c.split("@@");
|
|
||||||
cinfo.setId(cSplit[0]);
|
|
||||||
cinfo.setName(cSplit[1]);
|
|
||||||
cinfo.setDescription(cSplit[2]);
|
|
||||||
if (!cSplit[3].trim().equals("")) {
|
|
||||||
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
|
|
||||||
}
|
|
||||||
cinfo.setZenodocommunity(cSplit[4]);
|
|
||||||
cinfo.setType(cSplit[5]);
|
|
||||||
consumer.accept(cinfo);
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getContextRelationResult() {
|
|
||||||
return contextRelationResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setContextRelationResult(List<String> contextRelationResult) {
|
|
||||||
this.contextRelationResult = contextRelationResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ISLookUpService getIsLookUp() {
|
|
||||||
return isLookUp;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIsLookUp(ISLookUpService isLookUpService) {
|
|
||||||
this.isLookUp = isLookUpService;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void execContextRelationQuery() throws ISLookUpException {
|
|
||||||
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
|
|
||||||
|
|
||||||
contextRelationResult.forEach(xml -> {
|
|
||||||
ContextInfo cinfo = new ContextInfo();
|
|
||||||
final Document doc;
|
|
||||||
|
|
||||||
try {
|
|
||||||
final SAXReader reader = new SAXReader();
|
|
||||||
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
|
||||||
doc = reader.read(new StringReader(xml));
|
|
||||||
Element root = doc.getRootElement();
|
|
||||||
cinfo.setId(root.attributeValue("id"));
|
|
||||||
|
|
||||||
Iterator<Element> it = root.elementIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
Element el = it.next();
|
|
||||||
if (el.getName().equals("category")) {
|
|
||||||
String categoryId = el.attributeValue("id");
|
|
||||||
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
|
||||||
if (categoryId.equals(category)) {
|
|
||||||
cinfo.setDatasourceList(getCategoryList(el, prefix));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
consumer.accept(cinfo);
|
|
||||||
} catch (DocumentException | SAXException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
private List<String> getCategoryList(Element el, String prefix) {
|
|
||||||
List<String> datasourceList = new ArrayList<>();
|
|
||||||
for (Object node : el.selectNodes(".//concept")) {
|
|
||||||
String oid = getOpenaireId((Node) node, prefix);
|
|
||||||
if (oid != null)
|
|
||||||
datasourceList.add(oid);
|
|
||||||
}
|
|
||||||
|
|
||||||
return datasourceList;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getOpenaireId(Node el, String prefix) {
|
|
||||||
for (Object node : el.selectNodes(".//param")) {
|
|
||||||
Node n = (Node) node;
|
|
||||||
if (n.valueOf("./@name").equals("openaireId")) {
|
|
||||||
return prefix + "|" + n.getText();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return makeOpenaireId(el, prefix);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String makeOpenaireId(Node el, String prefix) {
|
|
||||||
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
String funder = "";
|
|
||||||
String grantId = null;
|
|
||||||
String funding = null;
|
|
||||||
for (Object node : el.selectNodes(".//param")) {
|
|
||||||
Node n = (Node) node;
|
|
||||||
switch (n.valueOf("./@name")) {
|
|
||||||
case "funding":
|
|
||||||
funding = n.getText();
|
|
||||||
break;
|
|
||||||
case "funder":
|
|
||||||
funder = n.getText();
|
|
||||||
break;
|
|
||||||
case "CD_PROJECT_NUMBER":
|
|
||||||
grantId = n.getText();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
String nsp = null;
|
|
||||||
|
|
||||||
switch (funder.toLowerCase()) {
|
|
||||||
case "ec":
|
|
||||||
if (funding == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (funding.toLowerCase().contains("h2020")) {
|
|
||||||
nsp = "corda__h2020::";
|
|
||||||
} else {
|
|
||||||
nsp = "corda_______::";
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case "tubitak":
|
|
||||||
nsp = "tubitakf____::";
|
|
||||||
break;
|
|
||||||
case "dfg":
|
|
||||||
nsp = "dfgf________::";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
StringBuilder bld = new StringBuilder();
|
|
||||||
bld.append(funder.toLowerCase());
|
|
||||||
for (int i = funder.length(); i < 12; i++)
|
|
||||||
bld.append("_");
|
|
||||||
bld.append("::");
|
|
||||||
nsp = bld.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
return prefix + "|" + nsp + DHPUtils.md5(grantId);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -8,16 +8,22 @@ import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Relation;
|
import eu.dnetlib.dhp.oa.model.graph.Relation;
|
||||||
|
import it.unimi.dsi.fastutil.objects.Object2BooleanMap;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
|
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
|
||||||
|
@ -73,10 +79,12 @@ public class SparkCollectAndSave implements Serializable {
|
||||||
.union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class))
|
.union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class))
|
||||||
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class))
|
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class))
|
||||||
.union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class))
|
.union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class))
|
||||||
|
.map(
|
||||||
|
(MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.json(outputPath + "/result");
|
.text(outputPath + "/result");
|
||||||
} else {
|
} else {
|
||||||
write(
|
write(
|
||||||
Utils
|
Utils
|
||||||
|
@ -89,7 +97,7 @@ public class SparkCollectAndSave implements Serializable {
|
||||||
write(
|
write(
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class),
|
.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class),
|
||||||
outputPath + "/otheresearchproduct");
|
outputPath + "/otherresearchproduct");
|
||||||
write(
|
write(
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath + "/result/software", GraphResult.class),
|
.readPath(spark, inputPath + "/result/software", GraphResult.class),
|
||||||
|
@ -97,26 +105,148 @@ public class SparkCollectAndSave implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dataset<String> dumpedIds = Utils.getEntitiesId(spark, outputPath);
|
||||||
|
|
||||||
|
// Dataset<Relation> relations = Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/publication", Relation.class)
|
||||||
|
// .union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
|
||||||
|
// .union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
|
||||||
|
// .union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
|
||||||
|
// .union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
|
||||||
|
// .union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
|
||||||
|
// .union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class));
|
||||||
|
|
||||||
|
// Utils.getValidRelations(relations, Utils.getEntitiesId(spark, outputPath))
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath + "/relation/publication", Relation.class)
|
.readPath(spark, inputPath + "/relation/publication", Relation.class)
|
||||||
.union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
|
|
||||||
.union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
|
|
||||||
.union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
|
|
||||||
.union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
|
|
||||||
.union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
|
|
||||||
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
|
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath + "/relation");
|
.json(outputPath + "/relation");
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/dataset", Relation.class)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/orp", Relation.class)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/software", Relation.class)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/contextOrg", Relation.class)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/context", Relation.class)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/relation", Relation.class)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
// relSource(
|
||||||
|
// inputPath, dumpedIds, Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/publication", Relation.class),
|
||||||
|
// inputPath + "/relSource/publication");
|
||||||
|
// relSource(
|
||||||
|
// inputPath, dumpedIds, Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/dataset", Relation.class),
|
||||||
|
// inputPath + "/relSource/dataset");
|
||||||
|
// relSource(
|
||||||
|
// inputPath, dumpedIds, Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/orp", Relation.class),
|
||||||
|
// inputPath + "/relSource/orp");
|
||||||
|
// relSource(
|
||||||
|
// inputPath, dumpedIds, Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/software", Relation.class),
|
||||||
|
// inputPath + "/relSource/software");
|
||||||
|
// relSource(
|
||||||
|
// inputPath, dumpedIds, Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/contextOrg", Relation.class),
|
||||||
|
// inputPath + "/relSource/contextOrg");
|
||||||
|
// relSource(
|
||||||
|
// inputPath, dumpedIds, Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/context", Relation.class),
|
||||||
|
// inputPath + "/relSource/context");
|
||||||
|
// relSource(
|
||||||
|
// inputPath, dumpedIds, Utils
|
||||||
|
// .readPath(spark, inputPath + "/relation/relation", Relation.class),
|
||||||
|
// inputPath + "/relSource/relation");
|
||||||
|
|
||||||
|
// relTarget(
|
||||||
|
// outputPath, dumpedIds, Utils.readPath(spark, inputPath + "/relSource/publication", Relation.class),
|
||||||
|
// SaveMode.Overwrite);
|
||||||
|
// relTarget(
|
||||||
|
// outputPath, dumpedIds, Utils.readPath(spark, inputPath + "/relSource/dataset", Relation.class),
|
||||||
|
// SaveMode.Append);
|
||||||
|
// relTarget(
|
||||||
|
// outputPath, dumpedIds, Utils.readPath(spark, inputPath + "/relSource/orp", Relation.class),
|
||||||
|
// SaveMode.Append);
|
||||||
|
// relTarget(
|
||||||
|
// outputPath, dumpedIds, Utils.readPath(spark, inputPath + "/relSource/software", Relation.class),
|
||||||
|
// SaveMode.Append);
|
||||||
|
// relTarget(
|
||||||
|
// outputPath, dumpedIds, Utils.readPath(spark, inputPath + "/relSource/contextOrg", Relation.class),
|
||||||
|
// SaveMode.Append);
|
||||||
|
// relTarget(
|
||||||
|
// outputPath, dumpedIds, Utils.readPath(spark, inputPath + "/relSource/context", Relation.class),
|
||||||
|
// SaveMode.Append);
|
||||||
|
// relTarget(
|
||||||
|
// outputPath, dumpedIds, Utils.readPath(spark, inputPath + "/relSource/relation", Relation.class),
|
||||||
|
// SaveMode.Append);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void relTarget(String outputPath, Dataset<String> dumpedIds, Dataset<Relation> relJoinSource,
|
||||||
|
SaveMode saveMode) {
|
||||||
|
relJoinSource
|
||||||
|
.joinWith(dumpedIds, relJoinSource.col("target").equalTo(dumpedIds.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(),
|
||||||
|
Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(saveMode)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void relSource(String inputPath, Dataset<String> dumpedIds, Dataset<Relation> relations,
|
||||||
|
String outputPath) {
|
||||||
|
|
||||||
|
relations
|
||||||
|
.joinWith(dumpedIds, relations.col("source").equalTo(dumpedIds.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(),
|
||||||
|
Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void write(Dataset<GraphResult> dataSet, String outputPath) {
|
private static void write(Dataset<GraphResult> dataSet, String outputPath) {
|
||||||
dataSet
|
dataSet
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.json(outputPath);
|
.text(outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,60 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Optional;
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Container;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Result;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.*;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Datasource;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Organization;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Spark Job that fires the dump for the entites
|
* Spark Job that fires the dump for the entites
|
||||||
*/
|
*/
|
||||||
public class SparkDumpEntitiesJob implements Serializable {
|
public class SparkDumpEntitiesJob implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
|
||||||
|
public static final String COMPRESSION = "compression";
|
||||||
|
public static final String GZIP = "gzip";
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
|
@ -42,13 +81,559 @@ public class SparkDumpEntitiesJob implements Serializable {
|
||||||
final String resultClassName = parser.get("resultTableName");
|
final String resultClassName = parser.get("resultTableName");
|
||||||
log.info("resultTableName: {}", resultClassName);
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
final String communityMapPath = parser.get("communityMapPath");
|
Optional<String> communityMap = Optional.ofNullable(parser.get("communityMapPath"));
|
||||||
|
String communityMapPath = null;
|
||||||
|
if (communityMap.isPresent())
|
||||||
|
communityMapPath = communityMap.get();
|
||||||
|
|
||||||
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
|
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
|
||||||
|
|
||||||
DumpGraphEntities dg = new DumpGraphEntities();
|
run(isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz);
|
||||||
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
|
||||||
|
Class<? extends OafEntity> inputClazz) {
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
|
||||||
|
case "50":
|
||||||
|
String finalCommunityMapPath = communityMapPath;
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
resultDump(
|
||||||
|
spark, inputPath, outputPath, finalCommunityMapPath, inputClazz);
|
||||||
|
});
|
||||||
|
|
||||||
|
break;
|
||||||
|
case "40":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
projectMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "20":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
organizationMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "10":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
datasourceMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <I extends OafEntity> void resultDump(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
String communityMapPath,
|
||||||
|
Class<I> inputClazz) {
|
||||||
|
|
||||||
|
CommunityMap communityMap = null;
|
||||||
|
if (!StringUtils.isEmpty(communityMapPath))
|
||||||
|
communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
|
||||||
|
CommunityMap finalCommunityMap = communityMap;
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<I, GraphResult>) value -> execMap(value, finalCommunityMap),
|
||||||
|
Encoders.bean(GraphResult.class))
|
||||||
|
.filter((FilterFunction<GraphResult>) value -> value != null)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.text(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends OafEntity, O extends Result> O execMap(I value,
|
||||||
|
CommunityMap communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException {
|
||||||
|
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||||
|
if (Boolean.FALSE.equals(odInfo.isPresent())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|
||||||
|
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (O) ResultMapper.map(value, communityMap, Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
|
||||||
|
Encoders.bean(Datasource.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
|
||||||
|
Encoders.bean(Project.class))
|
||||||
|
.filter((FilterFunction<Project>) p -> p != null)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
|
||||||
|
if (Boolean.TRUE.equals(d.getDataInfo().getDeletedbyinference()))
|
||||||
|
return null;
|
||||||
|
Datasource datasource = new Datasource();
|
||||||
|
|
||||||
|
datasource.setId(getEntityId(d.getId(), ENTITY_ID_SEPARATOR));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOriginalId())
|
||||||
|
.ifPresent(
|
||||||
|
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
pids -> datasource
|
||||||
|
.setPid(
|
||||||
|
pids
|
||||||
|
.stream()
|
||||||
|
.map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatasourcetype())
|
||||||
|
.ifPresent(
|
||||||
|
dsType -> datasource
|
||||||
|
.setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOpenairecompatibility())
|
||||||
|
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOfficialname())
|
||||||
|
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getEnglishname())
|
||||||
|
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getWebsiteurl())
|
||||||
|
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getLogourl())
|
||||||
|
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDateofvalidation())
|
||||||
|
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDescription())
|
||||||
|
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getSubjects())
|
||||||
|
.ifPresent(
|
||||||
|
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdpolicies())
|
||||||
|
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdlanguages())
|
||||||
|
.ifPresent(
|
||||||
|
langs -> datasource
|
||||||
|
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdcontenttypes())
|
||||||
|
.ifPresent(
|
||||||
|
ctypes -> datasource
|
||||||
|
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getReleasestartdate())
|
||||||
|
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getReleaseenddate())
|
||||||
|
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getMissionstatementurl())
|
||||||
|
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatabaseaccesstype())
|
||||||
|
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatauploadtype())
|
||||||
|
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatabaseaccessrestriction())
|
||||||
|
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatauploadrestriction())
|
||||||
|
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getVersioning())
|
||||||
|
.ifPresent(v -> datasource.setVersioning(v.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getCitationguidelineurl())
|
||||||
|
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPidsystems())
|
||||||
|
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getCertificates())
|
||||||
|
.ifPresent(c -> datasource.setCertificates(c.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPolicies())
|
||||||
|
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getJournal())
|
||||||
|
.ifPresent(j -> datasource.setJournal(getContainer(j)));
|
||||||
|
|
||||||
|
// Optional
|
||||||
|
// .ofNullable(d.getMeasures())
|
||||||
|
// .ifPresent(m -> datasource.setIndicators(Utils.getIndicator(d.getMeasures())));
|
||||||
|
|
||||||
|
return datasource;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Container getContainer(Journal j) {
|
||||||
|
Container c = new Container();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getName())
|
||||||
|
.ifPresent(n -> c.setName(n));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnPrinted())
|
||||||
|
.ifPresent(issnp -> c.setIssnPrinted(issnp));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnOnline())
|
||||||
|
.ifPresent(issno -> c.setIssnOnline(issno));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnLinking())
|
||||||
|
.ifPresent(isnl -> c.setIssnLinking(isnl));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getEp())
|
||||||
|
.ifPresent(ep -> c.setEp(ep));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIss())
|
||||||
|
.ifPresent(iss -> c.setIss(iss));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getSp())
|
||||||
|
.ifPresent(sp -> c.setSp(sp));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getVol())
|
||||||
|
.ifPresent(vol -> c.setVol(vol));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getEdition())
|
||||||
|
.ifPresent(edition -> c.setEdition(edition));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getConferencedate())
|
||||||
|
.ifPresent(cdate -> c.setConferencedate(cdate));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getConferenceplace())
|
||||||
|
.ifPresent(cplace -> c.setConferenceplace(cplace));
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
|
||||||
|
if (Boolean.TRUE.equals(p.getDataInfo().getDeletedbyinference()))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
Project project = new Project();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getId())
|
||||||
|
.ifPresent(id -> project.setId(getEntityId(id, ENTITY_ID_SEPARATOR)));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getWebsiteurl())
|
||||||
|
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getCode())
|
||||||
|
.ifPresent(code -> project.setCode(code.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getAcronym())
|
||||||
|
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getTitle())
|
||||||
|
.ifPresent(title -> project.setTitle(title.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getStartdate())
|
||||||
|
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getEnddate())
|
||||||
|
.ifPresent(edate -> project.setEnddate(edate.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getCallidentifier())
|
||||||
|
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getKeywords())
|
||||||
|
.ifPresent(key -> project.setKeywords(key.getValue()));
|
||||||
|
|
||||||
|
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
|
||||||
|
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
|
||||||
|
boolean mandate = false;
|
||||||
|
if (omandate.isPresent()) {
|
||||||
|
if (omandate.get().getValue().equals("true")) {
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (oecsc39.isPresent()) {
|
||||||
|
if (oecsc39.get().getValue().equals("true")) {
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project.setOpenaccessmandateforpublications(mandate);
|
||||||
|
project.setOpenaccessmandatefordataset(false);
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getEcarticle29_3())
|
||||||
|
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
|
||||||
|
|
||||||
|
project
|
||||||
|
.setSubject(
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getSubjects())
|
||||||
|
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getSummary())
|
||||||
|
.ifPresent(summary -> project.setSummary(summary.getValue()));
|
||||||
|
|
||||||
|
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
|
||||||
|
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
|
||||||
|
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
|
||||||
|
|
||||||
|
if (ocurrency.isPresent()) {
|
||||||
|
if (ofundedamount.isPresent()) {
|
||||||
|
if (ototalcost.isPresent()) {
|
||||||
|
project
|
||||||
|
.setGranted(
|
||||||
|
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
|
||||||
|
} else {
|
||||||
|
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project
|
||||||
|
.setH2020programme(
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getH2020classification())
|
||||||
|
.map(
|
||||||
|
classification -> classification
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
c -> Programme
|
||||||
|
.newInstance(
|
||||||
|
c.getH2020Programme().getCode(), c.getH2020Programme().getDescription()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional<List<Field<String>>> ofundTree = Optional
|
||||||
|
.ofNullable(p.getFundingtree());
|
||||||
|
List<Funder> funList = new ArrayList<>();
|
||||||
|
if (ofundTree.isPresent()) {
|
||||||
|
for (Field<String> fundingtree : ofundTree.get()) {
|
||||||
|
funList.add(getFunder(fundingtree.getValue()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
project.setFunding(funList);
|
||||||
|
|
||||||
|
// if (Optional.ofNullable(p.getMeasures()).isPresent()) {
|
||||||
|
// project.setIndicators(Utils.getIndicator(p.getMeasures()));
|
||||||
|
// }
|
||||||
|
return project;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Funder getFunder(String fundingtree) throws DocumentException {
|
||||||
|
Funder f = new Funder();
|
||||||
|
final Document doc;
|
||||||
|
|
||||||
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
|
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
|
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||||
|
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
|
||||||
|
String id = "";
|
||||||
|
|
||||||
|
StringBuilder bld = new StringBuilder();
|
||||||
|
|
||||||
|
int level = 0;
|
||||||
|
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
|
||||||
|
while (!nodes.isEmpty()) {
|
||||||
|
for (org.dom4j.Node n : nodes) {
|
||||||
|
|
||||||
|
List node = n.selectNodes("./id");
|
||||||
|
id = ((org.dom4j.Node) node.get(0)).getText();
|
||||||
|
id = id.substring(id.indexOf("::") + 2);
|
||||||
|
|
||||||
|
node = n.selectNodes("./description");
|
||||||
|
bld.append(((Node) node.get(0)).getText() + " - ");
|
||||||
|
|
||||||
|
}
|
||||||
|
level += 1;
|
||||||
|
nodes = doc.selectNodes("//funding_level_" + level);
|
||||||
|
}
|
||||||
|
String description = bld.toString();
|
||||||
|
if (!id.equals("")) {
|
||||||
|
Fundings fundings = new Fundings();
|
||||||
|
fundings.setId(id);
|
||||||
|
fundings.setDescription(description.substring(0, description.length() - 3).trim());
|
||||||
|
f.setFunding_stream(fundings);
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
|
||||||
|
Encoders.bean(Organization.class))
|
||||||
|
.filter((FilterFunction<Organization>) o -> o != null)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Organization org) {
|
||||||
|
if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference()))
|
||||||
|
return null;
|
||||||
|
if (!Optional.ofNullable(org.getLegalname()).isPresent()
|
||||||
|
&& !Optional.ofNullable(org.getLegalshortname()).isPresent())
|
||||||
|
return null;
|
||||||
|
|
||||||
|
Organization organization = new Organization();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalshortname())
|
||||||
|
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalname())
|
||||||
|
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getWebsiteurl())
|
||||||
|
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getAlternativeNames())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setAlternativenames(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(v -> v.getValue())
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> {
|
||||||
|
if (!value.getClassid().equals(eu.dnetlib.dhp.oa.graph.dump.complete.Constants.UNKNOWN)) {
|
||||||
|
organization
|
||||||
|
.setCountry(
|
||||||
|
eu.dnetlib.dhp.oa.model.Country.newInstance(value.getClassid(), value.getClassname()));
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getId())
|
||||||
|
.ifPresent(value -> organization.setId(getEntityId(value, ENTITY_ID_SEPARATOR)));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setPid(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
return organization;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -23,7 +25,6 @@ import org.slf4j.LoggerFactory;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.model.Provenance;
|
import eu.dnetlib.dhp.oa.model.Provenance;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Node;
|
|
||||||
import eu.dnetlib.dhp.oa.model.graph.RelType;
|
import eu.dnetlib.dhp.oa.model.graph.RelType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
@ -80,22 +81,18 @@ public class SparkDumpRelationJob implements Serializable {
|
||||||
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set<String> removeSet) {
|
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set<String> removeSet) {
|
||||||
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
|
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
|
||||||
relations
|
relations
|
||||||
.filter((FilterFunction<Relation>) r -> !removeSet.contains(r.getRelClass()))
|
.filter(
|
||||||
|
(FilterFunction<Relation>) r -> !removeSet.contains(r.getRelClass())
|
||||||
|
&& !r.getSubRelType().equalsIgnoreCase("resultService"))
|
||||||
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
|
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
|
||||||
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
|
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
|
||||||
relNew
|
relNew
|
||||||
.setSource(
|
.setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR));
|
||||||
Node
|
relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)));
|
||||||
.newInstance(
|
|
||||||
relation.getSource(),
|
|
||||||
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
|
|
||||||
|
|
||||||
relNew
|
relNew
|
||||||
.setTarget(
|
.setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR));
|
||||||
Node
|
relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)));
|
||||||
.newInstance(
|
|
||||||
relation.getTarget(),
|
|
||||||
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
|
|
||||||
|
|
||||||
relNew
|
relNew
|
||||||
.setReltype(
|
.setReltype(
|
||||||
|
@ -127,7 +124,7 @@ public class SparkDumpRelationJob implements Serializable {
|
||||||
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
|
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Append)
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,9 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelSupport.idPrefixMap;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -21,13 +24,16 @@ import org.slf4j.LoggerFactory;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
import eu.dnetlib.dhp.oa.model.Provenance;
|
import eu.dnetlib.dhp.oa.model.Provenance;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Node;
|
|
||||||
import eu.dnetlib.dhp.oa.model.graph.RelType;
|
import eu.dnetlib.dhp.oa.model.graph.RelType;
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -59,8 +65,9 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final OrganizationMap organizationMap = new Gson()
|
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
|
||||||
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
final CommunityEntityMap organizationMap = queryCommunityAPI.getCommunityOrganization();
|
||||||
|
|
||||||
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
|
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
|
||||||
log.info("organization map : {}", serializedOrganizationMap);
|
log.info("organization map : {}", serializedOrganizationMap);
|
||||||
|
|
||||||
|
@ -80,7 +87,7 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
|
private static void extractRelation(SparkSession spark, String inputPath, CommunityEntityMap organizationMap,
|
||||||
String outputPath, String communityMapPath) {
|
String outputPath, String communityMapPath) {
|
||||||
|
|
||||||
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
@ -101,7 +108,7 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
.as(Encoders.bean(MergedRels.class));
|
.as(Encoders.bean(MergedRels.class));
|
||||||
|
|
||||||
mergedRelsDataset.map((MapFunction<MergedRels, MergedRels>) mergedRels -> {
|
mergedRelsDataset.map((MapFunction<MergedRels, MergedRels>) mergedRels -> {
|
||||||
if (organizationMap.containsKey(mergedRels.getOrganizationId())) {
|
if (organizationMap.containsKey(getEntityId(mergedRels.getOrganizationId(), ENTITY_ID_SEPARATOR))) {
|
||||||
return mergedRels;
|
return mergedRels;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
@ -130,15 +137,16 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@NotNull
|
@NotNull
|
||||||
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
|
private static Consumer<MergedRels> getMergedRelsConsumer(CommunityEntityMap organizationMap,
|
||||||
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, CommunityMap communityMap) {
|
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, CommunityMap communityMap) {
|
||||||
return mergedRels -> {
|
return mergedRels -> {
|
||||||
String oId = mergedRels.getOrganizationId();
|
String oId = getEntityId(mergedRels.getOrganizationId(), ENTITY_ID_SEPARATOR);
|
||||||
organizationMap
|
organizationMap
|
||||||
.get(oId)
|
.get(oId)
|
||||||
.forEach(community -> {
|
.forEach(community -> {
|
||||||
if (communityMap.containsKey(community)) {
|
if (communityMap.containsKey(community)) {
|
||||||
addRelations(relList, community, mergedRels.getRepresentativeId());
|
addRelations(
|
||||||
|
relList, community, getEntityId(mergedRels.getRepresentativeId(), ENTITY_ID_SEPARATOR));
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
|
@ -155,8 +163,9 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
.add(
|
.add(
|
||||||
eu.dnetlib.dhp.oa.model.graph.Relation
|
eu.dnetlib.dhp.oa.model.graph.Relation
|
||||||
.newInstance(
|
.newInstance(
|
||||||
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
id, Constants.CONTEXT_ENTITY,
|
||||||
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
organization,
|
||||||
|
ModelSupport.idPrefixEntity.get(idPrefixMap.get(Organization.class)),
|
||||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
Provenance
|
Provenance
|
||||||
.newInstance(
|
.newInstance(
|
||||||
|
@ -167,8 +176,8 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
.add(
|
.add(
|
||||||
eu.dnetlib.dhp.oa.model.graph.Relation
|
eu.dnetlib.dhp.oa.model.graph.Relation
|
||||||
.newInstance(
|
.newInstance(
|
||||||
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
organization, ModelSupport.idPrefixEntity.get(idPrefixMap.get(Organization.class)),
|
||||||
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
id, Constants.CONTEXT_ENTITY,
|
||||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
Provenance
|
Provenance
|
||||||
.newInstance(
|
.newInstance(
|
||||||
|
|
|
@ -4,14 +4,15 @@ package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.*;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.types.*;
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -59,78 +60,42 @@ public class SparkSelectValidRelationsJob implements Serializable {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
selectValidRelation(spark, inputPath, outputPath);
|
selectValidRelation2(spark, inputPath, outputPath);
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) {
|
private static void selectValidRelation2(SparkSession spark, String inputPath, String outputPath) {
|
||||||
Dataset<Relation> relation = Utils.readPath(spark, inputPath + "/relation", Relation.class);
|
final StructType structureSchema = new StructType()
|
||||||
Dataset<Publication> publication = Utils.readPath(spark, inputPath + "/publication", Publication.class);
|
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
|
||||||
Dataset<eu.dnetlib.dhp.schema.oaf.Dataset> dataset = Utils
|
|
||||||
.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
|
|
||||||
Dataset<Software> software = Utils.readPath(spark, inputPath + "/software", Software.class);
|
|
||||||
Dataset<OtherResearchProduct> other = Utils
|
|
||||||
.readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class);
|
|
||||||
Dataset<Organization> organization = Utils.readPath(spark, inputPath + "/organization", Organization.class);
|
|
||||||
Dataset<Project> project = Utils.readPath(spark, inputPath + "/project", Project.class);
|
|
||||||
Dataset<Datasource> datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class);
|
|
||||||
|
|
||||||
relation.createOrReplaceTempView("relation");
|
org.apache.spark.sql.Dataset<Row> df = spark.createDataFrame(new ArrayList<Row>(), structureSchema);
|
||||||
publication.createOrReplaceTempView("publication");
|
List<String> entities = Arrays
|
||||||
dataset.createOrReplaceTempView("dataset");
|
.asList(
|
||||||
other.createOrReplaceTempView("other");
|
"publication", "dataset", "otherresearchproduct", "software", "organization", "project", "datasource");
|
||||||
software.createOrReplaceTempView("software");
|
for (String e : entities)
|
||||||
organization.createOrReplaceTempView("organization");
|
df = df
|
||||||
project.createOrReplaceTempView("project");
|
.union(
|
||||||
datasource.createOrReplaceTempView("datasource");
|
spark
|
||||||
|
.read()
|
||||||
|
.schema(structureSchema)
|
||||||
|
.json(inputPath + "/" + e)
|
||||||
|
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true"));
|
||||||
|
|
||||||
spark
|
org.apache.spark.sql.Dataset<Row> relations = spark
|
||||||
.sql(
|
.read()
|
||||||
"SELECT id " +
|
.schema(Encoders.bean(Relation.class).schema())
|
||||||
"FROM publication " +
|
.json(inputPath + "/relation")
|
||||||
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
.filter("dataInfo.deletedbyinference == false");
|
||||||
"UNION ALL " +
|
|
||||||
"SELECT id " +
|
|
||||||
"FROM dataset " +
|
|
||||||
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
|
||||||
"UNION ALL " +
|
|
||||||
"SELECT id " +
|
|
||||||
"FROM other " +
|
|
||||||
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
|
||||||
"UNION ALL " +
|
|
||||||
"SELECT id " +
|
|
||||||
"FROM software " +
|
|
||||||
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
|
||||||
"UNION ALL " +
|
|
||||||
"SELECT id " +
|
|
||||||
"FROM organization " +
|
|
||||||
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
|
||||||
"UNION ALL " +
|
|
||||||
"SELECT id " +
|
|
||||||
"FROM project " +
|
|
||||||
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
|
||||||
"UNION ALL " +
|
|
||||||
"SELECT id " +
|
|
||||||
"FROM datasource " +
|
|
||||||
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false ")
|
|
||||||
.createOrReplaceTempView("identifiers");
|
|
||||||
|
|
||||||
spark
|
relations
|
||||||
.sql(
|
.join(df, relations.col("source").equalTo(df.col("id")), "leftsemi")
|
||||||
"SELECT relation.* " +
|
.join(df, relations.col("target").equalTo(df.col("id")), "leftsemi")
|
||||||
"FROM relation " +
|
|
||||||
"JOIN identifiers i1 " +
|
|
||||||
"ON source = i1.id " +
|
|
||||||
"JOIN identifiers i2 " +
|
|
||||||
"ON target = i2.id " +
|
|
||||||
"WHERE datainfo.deletedbyinference = false")
|
|
||||||
.as(Encoders.bean(Relation.class))
|
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.country;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 27/04/23
|
||||||
|
* Selects the results having in the country the given country
|
||||||
|
*/
|
||||||
|
public class SparkFindResultWithCountry implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkFindResultWithCountry.class);
|
||||||
|
|
||||||
|
public static final String COMPRESSION = "compression";
|
||||||
|
public static final String GZIP = "gzip";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkFindResultWithCountry.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultType = parser.get("resultType");
|
||||||
|
log.info("resultType: {}", resultType);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String preparedInfoPath = parser.get("resultWithCountry");
|
||||||
|
|
||||||
|
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz = (Class<? extends eu.dnetlib.dhp.schema.oaf.Result>) Class
|
||||||
|
.forName(resultClassName);
|
||||||
|
|
||||||
|
run(
|
||||||
|
isSparkSessionManaged, inputPath, outputPath, inputClazz,
|
||||||
|
resultType, preparedInfoPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
|
||||||
|
|
||||||
|
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz, String resultType, String preparedInfoPath) {
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath + "/original/" + resultType);
|
||||||
|
|
||||||
|
resultDump(
|
||||||
|
spark, inputPath, outputPath, inputClazz, resultType, preparedInfoPath);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void resultDump(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<I> inputClazz,
|
||||||
|
|
||||||
|
String resultType,
|
||||||
|
|
||||||
|
String preparedInfoPath) {
|
||||||
|
|
||||||
|
Dataset<String> resultsWithCountry = spark.read().textFile(preparedInfoPath).distinct();
|
||||||
|
|
||||||
|
Dataset<I> result = Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<I>) r -> !r.getDataInfo().getInvisible() && !r.getDataInfo().getDeletedbyinference());
|
||||||
|
|
||||||
|
resultsWithCountry
|
||||||
|
.joinWith(result, resultsWithCountry.col("value").equalTo(result.col("id")))
|
||||||
|
.map((MapFunction<Tuple2<String, I>, I>) t2 -> t2._2(), Encoders.bean(inputClazz))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/" + resultType);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,173 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.country;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 27/04/23
|
||||||
|
* Finds the results id which are in relation with another entity having the given country
|
||||||
|
* or that have that country in the country list
|
||||||
|
*/
|
||||||
|
public class SparkFindResultsRelatedToCountry implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkFindResultsRelatedToCountry.class);
|
||||||
|
|
||||||
|
public static final String COMPRESSION = "compression";
|
||||||
|
public static final String GZIP = "gzip";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkFindResultsRelatedToCountry.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String country = parser.get("country");
|
||||||
|
|
||||||
|
run(
|
||||||
|
isSparkSessionManaged, inputPath, outputPath, country);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
|
||||||
|
String country) {
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
findRelatedEntities(
|
||||||
|
spark, inputPath, outputPath, country);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void findRelatedEntities(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
String country) {
|
||||||
|
|
||||||
|
Dataset<Project> projectsInCountry = Utils
|
||||||
|
.readPath(spark, inputPath + "/project", Project.class)
|
||||||
|
.filter((FilterFunction<Project>) p -> isCountryInFunderJurisdiction(p.getFundingtree(), country));
|
||||||
|
|
||||||
|
Dataset<Relation> relsProjectResults = Utils
|
||||||
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
|
r.getRelClass().equals(ModelConstants.PRODUCES));
|
||||||
|
|
||||||
|
projectsInCountry
|
||||||
|
.joinWith(relsProjectResults, projectsInCountry.col("id").equalTo(relsProjectResults.col("source")))
|
||||||
|
.map((MapFunction<Tuple2<Project, Relation>, String>) t2 -> t2._2().getTarget(), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(outputPath);
|
||||||
|
|
||||||
|
Dataset<Organization> organizationsInCountry = Utils
|
||||||
|
.readPath(spark, inputPath + "/organization", Organization.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference()
|
||||||
|
&& o.getCountry().getClassid().equals(country));
|
||||||
|
|
||||||
|
Dataset<Relation> relsOrganizationResults = Utils
|
||||||
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
|
r.getRelClass().equals(ModelConstants.IS_AUTHOR_INSTITUTION_OF));
|
||||||
|
|
||||||
|
organizationsInCountry
|
||||||
|
.joinWith(
|
||||||
|
relsOrganizationResults,
|
||||||
|
organizationsInCountry.col("id").equalTo(relsOrganizationResults.col("source")))
|
||||||
|
.map((MapFunction<Tuple2<Organization, Relation>, String>) t2 -> t2._2().getTarget(), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.text(outputPath);
|
||||||
|
|
||||||
|
selectResultWithCountry(spark, inputPath, outputPath, country, "publication", Publication.class);
|
||||||
|
selectResultWithCountry(
|
||||||
|
spark, inputPath, outputPath, country, "dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
|
||||||
|
selectResultWithCountry(spark, inputPath, outputPath, country, "software", Software.class);
|
||||||
|
selectResultWithCountry(
|
||||||
|
spark, inputPath, outputPath, country, "otherresearchproduct", OtherResearchProduct.class);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void selectResultWithCountry(SparkSession spark, String inputPath,
|
||||||
|
String outputPath, String country, String type, Class<R> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/" + type, inputClazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() && !p.getDataInfo().getInvisible() &&
|
||||||
|
p.getCountry() != null &&
|
||||||
|
p.getCountry().stream().anyMatch(c -> c.getClassid().equals(country)))
|
||||||
|
.map((MapFunction<R, String>) p -> p.getId(), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.text(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isCountryInFunderJurisdiction(List<Field<String>> fundingtrees, String country) {
|
||||||
|
try {
|
||||||
|
final SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
|
for (Field<String> fundingtree : fundingtrees) {
|
||||||
|
final Document doc = reader.read(new StringReader(fundingtree.getValue()));
|
||||||
|
if (((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText().equals(country)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} catch (DocumentException | SAXException e) {
|
||||||
|
throw new IllegalArgumentException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,102 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 05/05/23
|
||||||
|
*/
|
||||||
|
public class AuthorResult implements Serializable {
|
||||||
|
private String authorId;
|
||||||
|
private String firstName;
|
||||||
|
private String lastName;
|
||||||
|
private String fullName;
|
||||||
|
private String orcid;
|
||||||
|
private String resultId;
|
||||||
|
private String rank;
|
||||||
|
private Boolean fromOrcid;
|
||||||
|
|
||||||
|
public Boolean getFromOrcid() {
|
||||||
|
return fromOrcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFromOrcid(Boolean fromOrcid) {
|
||||||
|
this.fromOrcid = fromOrcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFullName() {
|
||||||
|
return fullName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFullName(String fullName) {
|
||||||
|
this.fullName = fullName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAuthorId() {
|
||||||
|
return authorId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuthorId(String authorId) {
|
||||||
|
this.authorId = authorId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResultId() {
|
||||||
|
return resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResultId(String resultId) {
|
||||||
|
this.resultId = resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRank() {
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRank(String rank) {
|
||||||
|
this.rank = rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return authorId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.authorId = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFirstName() {
|
||||||
|
return firstName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFirstName(String firstName) {
|
||||||
|
this.firstName = firstName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLastName() {
|
||||||
|
return lastName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLastName(String lastName) {
|
||||||
|
this.lastName = lastName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOrcid() {
|
||||||
|
return orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrcid(String orcid) {
|
||||||
|
this.orcid = orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void autosetId() {
|
||||||
|
if (orcid != null) {
|
||||||
|
authorId = DHPUtils.md5(orcid);
|
||||||
|
} else {
|
||||||
|
authorId = DHPUtils.md5(resultId + rank);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 10/05/23
|
||||||
|
*/
|
||||||
|
public class Constants implements Serializable {
|
||||||
|
public final static String SEP = "\t";
|
||||||
|
|
||||||
|
public static final String addQuotes(String id) {
|
||||||
|
// if (StringUtils.isNotEmpty(id))
|
||||||
|
// return "\"" + id + "\"";
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,96 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static org.apache.commons.lang3.StringUtils.split;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/05/23
|
||||||
|
*/
|
||||||
|
//STEP 1
|
||||||
|
public class DumpCommunities implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DumpCommunities.class);
|
||||||
|
private final BufferedWriter writer;
|
||||||
|
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
|
||||||
|
+ " description \n";
|
||||||
|
private final transient UtilCommunityAPI queryCommunityAPI;
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
DumpCommunities.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String nameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", nameNode);
|
||||||
|
|
||||||
|
final List<String> communities = Arrays.asList(split(parser.get("communities"), ";"));
|
||||||
|
|
||||||
|
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode);
|
||||||
|
|
||||||
|
dc.writeCommunity(communities);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeCommunity(List<String> communities)
|
||||||
|
throws IOException {
|
||||||
|
writer.write(HEADER);
|
||||||
|
writer.flush();
|
||||||
|
|
||||||
|
for (String community : queryCommunityAPI
|
||||||
|
.getCommunityCsv(communities)) {
|
||||||
|
writer
|
||||||
|
.write(
|
||||||
|
community);
|
||||||
|
writer.write("\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public DumpCommunities(String hdfsPath, String hdfsNameNode) throws Exception {
|
||||||
|
final Configuration conf = new Configuration();
|
||||||
|
queryCommunityAPI = new UtilCommunityAPI();
|
||||||
|
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
}
|
||||||
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
|
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,362 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static org.apache.commons.lang3.StringUtils.remove;
|
||||||
|
import static org.apache.commons.lang3.StringUtils.split;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collector;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.*;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVPid;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRelResAut;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/05/23
|
||||||
|
*/
|
||||||
|
//STEP 3
|
||||||
|
public class SparkDumpResults implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpResults.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpResults.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String resultType = parser.get("resultType");
|
||||||
|
log.info("resultType: {}", resultType);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String workingPath = parser.get("workingPath");
|
||||||
|
|
||||||
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
// Utils.removeOutputDir(spark, outputPath);
|
||||||
|
run(spark, inputPath, inputClazz, resultType, workingPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void run(SparkSession spark, String inputPath,
|
||||||
|
Class<R> inputClazz, String resultType, String workingPath) {
|
||||||
|
|
||||||
|
Dataset<String> resultIds = spark.read().textFile(workingPath + "/resultIds");
|
||||||
|
// resultIds.foreach((ForeachFunction<String>) r -> System.out.println(r));
|
||||||
|
Dataset<R> results = Utils
|
||||||
|
.readPath(spark, inputPath + "/" + resultType, inputClazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() && !p.getDataInfo().getInvisible());
|
||||||
|
|
||||||
|
resultIds
|
||||||
|
.joinWith(results, resultIds.col("value").equalTo(results.col("id")))
|
||||||
|
.map((MapFunction<Tuple2<String, R>, R>) t2 -> t2._2(), Encoders.bean(inputClazz))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(workingPath + "/" + resultType + "/temp/result");
|
||||||
|
|
||||||
|
// map results
|
||||||
|
results = Utils.readPath(spark, workingPath + "/" + resultType + "/temp/result", inputClazz);
|
||||||
|
results
|
||||||
|
.map(
|
||||||
|
(MapFunction<R, CSVResult>) r -> mapResultInfo(r),
|
||||||
|
Encoders.bean(CSVResult.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(workingPath + "/" + resultType + "/result");
|
||||||
|
|
||||||
|
// map relations between pid and result
|
||||||
|
results
|
||||||
|
.flatMap((FlatMapFunction<R, CSVPid>) r -> {
|
||||||
|
List<CSVPid> pids = new ArrayList<>();
|
||||||
|
if (Optional.ofNullable(r.getPid()).isPresent() && r.getPid().size() > 0) {
|
||||||
|
pids.addAll(mapPid(r.getPid(), r.getId()));
|
||||||
|
}
|
||||||
|
return pids.iterator();
|
||||||
|
}, Encoders.bean(CSVPid.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(workingPath + "/" + resultType + "/result_pid");
|
||||||
|
|
||||||
|
// map authors from the result
|
||||||
|
// per ogni autore nel result
|
||||||
|
// se l'autore ha un orcid il suo id dipende dall'orcid (tipo md5(orcid))
|
||||||
|
// se non ha orcid il suo id si costruisce come result_id + authorrank ( se non ha il rank si sua
|
||||||
|
// la sua posizione nell'insieme degli autori) sempre con md5
|
||||||
|
results
|
||||||
|
.flatMap((FlatMapFunction<R, AuthorResult>) r -> {
|
||||||
|
int count = 0;
|
||||||
|
List<AuthorResult> arl = new ArrayList<>();
|
||||||
|
Set<String> authorIds = new HashSet();
|
||||||
|
if (Optional.ofNullable(r.getAuthor()).isPresent()) {
|
||||||
|
for (Author a : r.getAuthor()) {
|
||||||
|
count += 1;
|
||||||
|
AuthorResult ar = new AuthorResult();
|
||||||
|
ar.setResultId(r.getId());
|
||||||
|
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||||
|
if (a.getRank() > 0) {
|
||||||
|
ar.setRank(String.valueOf(a.getRank()));
|
||||||
|
} else {
|
||||||
|
ar.setRank(String.valueOf(count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ar.setFirstName(removeBreaks(a.getName()));
|
||||||
|
ar.setLastName(removeBreaks(a.getSurname()));
|
||||||
|
ar.setFullName(removeBreaks(a.getFullname()));
|
||||||
|
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
|
||||||
|
if (Optional.ofNullable(orcid).isPresent()) {
|
||||||
|
ar.setOrcid(orcid._1());
|
||||||
|
ar.setFromOrcid(orcid._2());
|
||||||
|
}
|
||||||
|
|
||||||
|
ar.autosetId();
|
||||||
|
|
||||||
|
if (!authorIds.contains(ar.getAuthorId())) {
|
||||||
|
arl.add(ar);
|
||||||
|
authorIds.add(ar.getAuthorId());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return arl.iterator();
|
||||||
|
}, Encoders.bean(AuthorResult.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(workingPath + "/" + resultType + "/temp/authorresult");
|
||||||
|
|
||||||
|
Dataset<AuthorResult> authorResult = Utils
|
||||||
|
.readPath(spark, workingPath + "/" + resultType + "/temp/authorresult", AuthorResult.class);
|
||||||
|
// map the relation between author and result
|
||||||
|
authorResult
|
||||||
|
.map(
|
||||||
|
(MapFunction<AuthorResult, CSVRelResAut>) ar -> {
|
||||||
|
CSVRelResAut ret = new CSVRelResAut();
|
||||||
|
ret.setResult_id(ar.getResultId());
|
||||||
|
ret.setAuthor_id(ar.getAuthorId());
|
||||||
|
return ret;
|
||||||
|
},
|
||||||
|
Encoders.bean(CSVRelResAut.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(workingPath + "/" + resultType + "/result_author");
|
||||||
|
|
||||||
|
// ma the authors in the working dir. I do not want to have them repeated. If I have an orcid as id, I choose
|
||||||
|
// the one from orcid if any
|
||||||
|
authorResult
|
||||||
|
.groupByKey((MapFunction<AuthorResult, String>) ar -> ar.getAuthorId(), Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, AuthorResult, CSVAuthor>) (k, it) -> {
|
||||||
|
AuthorResult first = it.next();
|
||||||
|
if (!Optional.ofNullable(first.getFromOrcid()).isPresent() || first.getFromOrcid())
|
||||||
|
return getAuthorDump(first);
|
||||||
|
while (it.hasNext()) {
|
||||||
|
AuthorResult ar = it.next();
|
||||||
|
if (ar.getFromOrcid())
|
||||||
|
return getAuthorDump(ar);
|
||||||
|
}
|
||||||
|
return getAuthorDump(first);
|
||||||
|
},
|
||||||
|
Encoders.bean(CSVAuthor.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(workingPath + "/" + resultType + "/author");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<CSVPid> mapPid(List<StructuredProperty> pid, String resultId) {
|
||||||
|
return pid
|
||||||
|
.stream()
|
||||||
|
.map(p -> p.getQualifier().getClassid().toLowerCase() + "@" + p.getValue().toLowerCase())
|
||||||
|
.distinct()
|
||||||
|
.map(p -> {
|
||||||
|
CSVPid ret = new CSVPid();
|
||||||
|
ret.setId(DHPUtils.md5(p + "@" + resultId));
|
||||||
|
ret.setResult_id(resultId);
|
||||||
|
ret.setPid(split(p, "@")[1]);
|
||||||
|
ret.setType(split(p, "@")[0]);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CSVAuthor getAuthorDump(AuthorResult ar) {
|
||||||
|
CSVAuthor ret = new CSVAuthor();
|
||||||
|
ret.setFirstname(ar.getFirstName());
|
||||||
|
|
||||||
|
ret.setId(ar.getAuthorId());
|
||||||
|
ret.setLastname(ar.getLastName());
|
||||||
|
|
||||||
|
ret.setFullname(ar.getFullName());
|
||||||
|
|
||||||
|
if (ar.getOrcid() != null) {
|
||||||
|
ret.setOrcid(ar.getOrcid());
|
||||||
|
ret.setFromOrcid(ar.getFromOrcid());
|
||||||
|
} else {
|
||||||
|
ret.setOrcid("");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
|
||||||
|
if (!Optional.ofNullable(pid).isPresent())
|
||||||
|
return null;
|
||||||
|
if (pid.size() == 0)
|
||||||
|
return null;
|
||||||
|
for (StructuredProperty p : pid) {
|
||||||
|
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
|
||||||
|
return new Tuple2<>(p.getValue(), Boolean.TRUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (StructuredProperty p : pid) {
|
||||||
|
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
|
||||||
|
return new Tuple2<>(p.getValue(), Boolean.FALSE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getFieldValue(Field<String> input) {
|
||||||
|
if (input != null &&
|
||||||
|
StringUtils.isNotEmpty(input.getValue())) {
|
||||||
|
return removeBreaks(input.getValue());
|
||||||
|
} else {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> CSVResult mapResultInfo(R r) {
|
||||||
|
CSVResult ret = new CSVResult();
|
||||||
|
ret.setId(removeBreaks(r.getId()));
|
||||||
|
ret.setType(removeBreaks(r.getResulttype().getClassid()));
|
||||||
|
ret.setTitle(getTitle(r.getTitle()));
|
||||||
|
ret.setDescription(getAbstract(r.getDescription()));
|
||||||
|
ret.setAccessright(removeBreaks(r.getBestaccessright().getClassid()));
|
||||||
|
ret.setPublication_date(removeBreaks(getFieldValue(r.getDateofacceptance())));
|
||||||
|
ret.setPublisher(removeBreaks(getFieldValue(r.getPublisher())));
|
||||||
|
|
||||||
|
if (Optional.ofNullable(r.getSubject()).isPresent())
|
||||||
|
ret.setKeywords(String.join(", ", r.getSubject().stream().map(s -> {
|
||||||
|
if (StringUtils.isNotEmpty(s.getValue()))
|
||||||
|
return removeBreaks(s.getValue().toLowerCase());
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
}).filter(Objects::nonNull).distinct().collect(Collectors.toList())));
|
||||||
|
else
|
||||||
|
ret.setKeywords("");
|
||||||
|
|
||||||
|
if (Optional.ofNullable(r.getCountry()).isPresent())
|
||||||
|
ret
|
||||||
|
.setCountry(
|
||||||
|
String.join(", ", r.getCountry().stream().map(Country::getClassid).collect(Collectors.toList())));
|
||||||
|
else
|
||||||
|
ret.setCountry("");
|
||||||
|
|
||||||
|
if (Optional.ofNullable(r.getLanguage()).isPresent() && StringUtils.isNotEmpty(r.getLanguage().getClassid())) {
|
||||||
|
ret.setLanguage(r.getLanguage().getClassid());
|
||||||
|
} else {
|
||||||
|
ret.setLanguage("");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getAbstract(List<Field<String>> description) {
|
||||||
|
if (description == null)
|
||||||
|
return "";
|
||||||
|
for (Field<String> abs : description) {
|
||||||
|
if (StringUtils.isNotEmpty(abs.getValue())) {
|
||||||
|
return removeBreaks(abs.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getTitle(List<StructuredProperty> titles) {
|
||||||
|
String firstTitle = null;
|
||||||
|
for (StructuredProperty title : titles) {
|
||||||
|
if (StringUtils.isEmpty(firstTitle)) {
|
||||||
|
if (StringUtils.isNotEmpty(title.getValue()))
|
||||||
|
firstTitle = removeBreaks(title.getValue());
|
||||||
|
}
|
||||||
|
if (title.getQualifier().getClassid().equals(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) {
|
||||||
|
if (StringUtils.isNotEmpty(title.getValue()))
|
||||||
|
return removeBreaks(title.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (firstTitle != null) {
|
||||||
|
return removeBreaks(firstTitle);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String removeBreaks(String input) {
|
||||||
|
if (StringUtils.isNotEmpty(input))
|
||||||
|
return input
|
||||||
|
.replace("\n", " ")
|
||||||
|
.replace("\t", " ")
|
||||||
|
.replace("\r", " ")
|
||||||
|
// .replace("\\", " ")
|
||||||
|
.replace("\"", " ");
|
||||||
|
|
||||||
|
return input;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,133 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVAuthor;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVPid;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRelResAut;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 10/05/23
|
||||||
|
*/
|
||||||
|
//STEP 4
|
||||||
|
public class SparkMoveOnSigleDir implements Serializable {
|
||||||
|
|
||||||
|
// All the products saved in different directories are put under the same one.
|
||||||
|
// For the authors also a step of reconciliation mast be done, since the same author id can be saved in more than
|
||||||
|
// one directory
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkMoveOnSigleDir.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkMoveOnSigleDir.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String workingPath = parser.get("workingPath");
|
||||||
|
log.info("workingPath: {}", workingPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
// Utils.removeOutputDir(spark, outputPath);
|
||||||
|
run(spark, outputPath, workingPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void run(SparkSession spark, String outputPath,
|
||||||
|
String workingPath) {
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, workingPath + "/publication/result", CSVResult.class)
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/dataset/result", CSVResult.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/software/result", CSVResult.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result", CSVResult.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("header", "true")
|
||||||
|
.option("delimiter", Constants.SEP)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.csv(outputPath + "/result");
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, workingPath + "/publication/result_pid", CSVPid.class)
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/dataset/result_pid", CSVPid.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/software/result_pid", CSVPid.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result_pid", CSVPid.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("header", "true")
|
||||||
|
.option("delimiter", Constants.SEP)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.csv(outputPath + "/result_pid");
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, workingPath + "/publication/result_author", CSVRelResAut.class)
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/dataset/result_author", CSVRelResAut.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/software/result_author", CSVRelResAut.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/result_author", CSVRelResAut.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("header", "true")
|
||||||
|
.option("delimiter", Constants.SEP)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.csv(outputPath + "/result_author");
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, workingPath + "/publication/author", CSVAuthor.class)
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/dataset/author", CSVAuthor.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/software/author", CSVAuthor.class))
|
||||||
|
.union(Utils.readPath(spark, workingPath + "/otherresearchproduct/author", CSVAuthor.class))
|
||||||
|
.groupByKey((MapFunction<CSVAuthor, String>) r -> r.getId(), Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, CSVAuthor, CSVAuthor>) (k, it) -> it.next(), Encoders.bean(CSVAuthor.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("header", "true")
|
||||||
|
.option("delimiter", Constants.SEP)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.csv(outputPath + "/author");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,227 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVCitation;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVRELCommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/05/23
|
||||||
|
*/
|
||||||
|
//STEP 2
|
||||||
|
public class SparkSelectResultsAndDumpRelations implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSelectResultsAndDumpRelations.class);
|
||||||
|
private static String RESULT_COMMUNITY_TABLE = "/result_community";
|
||||||
|
private static String COMMUNITY_RESULT_IDS = "/communityResultIds";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSelectResultsAndDumpRelations.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String workingPath = parser.get("workingPath");
|
||||||
|
|
||||||
|
List<String> communityList = null;
|
||||||
|
Optional<String> communities = Optional.ofNullable(parser.get("communities"));
|
||||||
|
if (communities.isPresent()) {
|
||||||
|
communityList = Arrays.asList(communities.get().split(";"));
|
||||||
|
}
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
List<String> finalCommunityList = communityList;
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
// Utils.removeOutputDir(spark, outputPath);
|
||||||
|
run(spark, inputPath, outputPath, workingPath, finalCommunityList);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void run(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
String workingPath,
|
||||||
|
List<String> communityList) {
|
||||||
|
|
||||||
|
// select the result ids related to the set of communities considered
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath + "/publication", Publication.class, communityList, workingPath + COMMUNITY_RESULT_IDS);
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath + "/dataset", Dataset.class, communityList, workingPath + COMMUNITY_RESULT_IDS);
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath + "/software", Software.class, communityList, workingPath + COMMUNITY_RESULT_IDS);
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, communityList,
|
||||||
|
workingPath + COMMUNITY_RESULT_IDS);
|
||||||
|
|
||||||
|
// write the relations result communities
|
||||||
|
writeCommunityResultRelations(
|
||||||
|
spark, inputPath + "/publication", Publication.class, communityList, outputPath + RESULT_COMMUNITY_TABLE);
|
||||||
|
writeCommunityResultRelations(
|
||||||
|
spark, inputPath + "/dataset", Dataset.class, communityList, outputPath + RESULT_COMMUNITY_TABLE);
|
||||||
|
writeCommunityResultRelations(
|
||||||
|
spark, inputPath + "/software", Software.class, communityList, outputPath + RESULT_COMMUNITY_TABLE);
|
||||||
|
writeCommunityResultRelations(
|
||||||
|
spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, communityList,
|
||||||
|
outputPath + RESULT_COMMUNITY_TABLE);
|
||||||
|
|
||||||
|
// select the relations with semantics cites
|
||||||
|
org.apache.spark.sql.Dataset<Relation> relations = Utils
|
||||||
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
|
r.getRelClass().equals(ModelConstants.CITES));
|
||||||
|
|
||||||
|
// select the results target of the selected relations having as source one of the results related to the
|
||||||
|
// communities
|
||||||
|
org.apache.spark.sql.Dataset<String> resultIds = spark
|
||||||
|
.read()
|
||||||
|
.textFile(workingPath + COMMUNITY_RESULT_IDS)
|
||||||
|
.distinct();
|
||||||
|
|
||||||
|
resultIds
|
||||||
|
.joinWith(relations, resultIds.col("value").equalTo(relations.col("source")), "left")
|
||||||
|
.flatMap((FlatMapFunction<Tuple2<String, Relation>, String>) t2 -> {
|
||||||
|
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||||
|
return Arrays.asList(t2._1(), t2._2().getTarget()).iterator();
|
||||||
|
} else {
|
||||||
|
return Arrays.asList(t2._1()).iterator();
|
||||||
|
}
|
||||||
|
}, Encoders.STRING())
|
||||||
|
.distinct()
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
|
||||||
|
.text(workingPath + "/resultIds");
|
||||||
|
|
||||||
|
resultIds
|
||||||
|
.joinWith(relations, resultIds.col("value").equalTo(relations.col("source")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<String, Relation>, CSVCitation>) t2 -> mapToCitation(t2._2()),
|
||||||
|
Encoders.bean(CSVCitation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.option("header", "true")
|
||||||
|
.option("delimiter", Constants.SEP)
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.csv(outputPath + "/relation");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CSVCitation mapToCitation(Relation relation) {
|
||||||
|
CSVCitation ret = new CSVCitation();
|
||||||
|
ret.setId(DHPUtils.md5(relation.getSource() + relation.getRelClass().toLowerCase() + relation.getTarget()));
|
||||||
|
ret.setResult_id_cites(relation.getSource());
|
||||||
|
ret.setResult_id_cited(relation.getTarget());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void writeCommunityResultRelations(SparkSession spark, String inputPath,
|
||||||
|
Class<R> clazz, List<String> communityList, String outputPath) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, clazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
|
||||||
|
!p.getDataInfo().getInvisible())
|
||||||
|
.flatMap((FlatMapFunction<R, CSVRELCommunityResult>) p -> {
|
||||||
|
Set<String> inserted = new HashSet<>();
|
||||||
|
List<CSVRELCommunityResult> ret = new ArrayList<>();
|
||||||
|
|
||||||
|
for (String context : p
|
||||||
|
.getContext()
|
||||||
|
.stream()
|
||||||
|
.map(Context::getId)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList())) {
|
||||||
|
String cId = context.contains("::")
|
||||||
|
? context.substring(0, context.indexOf("::"))
|
||||||
|
: context;
|
||||||
|
if (communityList.contains(cId) && !inserted.contains(cId)) {
|
||||||
|
CSVRELCommunityResult crc = new CSVRELCommunityResult();
|
||||||
|
crc.setResult_id(p.getId());
|
||||||
|
crc.setCommunity_id(DHPUtils.md5(cId));
|
||||||
|
ret.add(crc);
|
||||||
|
inserted.add(cId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret.iterator();
|
||||||
|
}, Encoders.bean(CSVRELCommunityResult.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("header", "true")
|
||||||
|
.option("delimiter", Constants.SEP)
|
||||||
|
.csv(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void writeCommunityRelatedIds(SparkSession spark, String inputPath,
|
||||||
|
Class<R> clazz, List<String> communityList, String outputPath) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, clazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
|
||||||
|
!p.getDataInfo().getInvisible() &&
|
||||||
|
isRelatedToCommunities(p, communityList))
|
||||||
|
.map((MapFunction<R, String>) Result::getId, Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.text(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> boolean isRelatedToCommunities(R p, List<String> communityList) {
|
||||||
|
return p
|
||||||
|
.getContext()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
c -> communityList.contains(c.getId()) ||
|
||||||
|
(c.getId().contains("::")
|
||||||
|
&& communityList.contains(c.getId().substring(0, c.getId().indexOf("::")))));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,68 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/05/23
|
||||||
|
*/
|
||||||
|
public class CSVAuthor implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String firstname;
|
||||||
|
private String lastname;
|
||||||
|
private String fullname;
|
||||||
|
private String orcid;
|
||||||
|
private Boolean fromOrcid;
|
||||||
|
|
||||||
|
public Boolean getFromOrcid() {
|
||||||
|
return fromOrcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFromOrcid(Boolean fromOrcid) {
|
||||||
|
this.fromOrcid = fromOrcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = Constants.addQuotes(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFirstname() {
|
||||||
|
return firstname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFirstname(String firstname) {
|
||||||
|
this.firstname = Constants.addQuotes(firstname);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLastname() {
|
||||||
|
return lastname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLastname(String lastname) {
|
||||||
|
this.lastname = Constants.addQuotes(lastname);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFullname() {
|
||||||
|
return fullname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFullname(String fullname) {
|
||||||
|
this.fullname = Constants.addQuotes(fullname);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOrcid() {
|
||||||
|
return orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrcid(String orcid) {
|
||||||
|
this.orcid = Constants.addQuotes(orcid);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/05/23
|
||||||
|
*/
|
||||||
|
public class CSVCitation implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String result_id_cites;
|
||||||
|
private String result_id_cited;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = Constants.addQuotes(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResult_id_cites() {
|
||||||
|
return result_id_cites;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResult_id_cites(String result_id_cites) {
|
||||||
|
this.result_id_cites = Constants.addQuotes(result_id_cites);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResult_id_cited() {
|
||||||
|
return result_id_cited;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResult_id_cited(String result_id_cited) {
|
||||||
|
this.result_id_cited = Constants.addQuotes(result_id_cited);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/05/23
|
||||||
|
*/
|
||||||
|
public class CSVPid implements Serializable {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
private String result_id;
|
||||||
|
private String pid;
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
public String getResult_id() {
|
||||||
|
return result_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResult_id(String result_id) {
|
||||||
|
this.result_id = Constants.addQuotes(result_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPid() {
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPid(String pid) {
|
||||||
|
this.pid = Constants.addQuotes(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = Constants.addQuotes(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = Constants.addQuotes(id);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/05/23
|
||||||
|
*/
|
||||||
|
public class CSVRELCommunityResult implements Serializable {
|
||||||
|
private String result_id;
|
||||||
|
private String community_id;
|
||||||
|
|
||||||
|
public String getResult_id() {
|
||||||
|
return result_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResult_id(String result_id) {
|
||||||
|
this.result_id = Constants.addQuotes(result_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCommunity_id() {
|
||||||
|
return community_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCommunity_id(String community_id) {
|
||||||
|
this.community_id = Constants.addQuotes(community_id);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/05/23
|
||||||
|
*/
|
||||||
|
public class CSVRelResAut implements Serializable {
|
||||||
|
private String result_id;
|
||||||
|
private String author_id;
|
||||||
|
|
||||||
|
public String getResult_id() {
|
||||||
|
return result_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResult_id(String result_id) {
|
||||||
|
this.result_id = Constants.addQuotes(result_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAuthor_id() {
|
||||||
|
return author_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuthor_id(String author_id) {
|
||||||
|
this.author_id = Constants.addQuotes(author_id);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,113 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonGetter;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonSetter;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import sun.swing.StringUIClientPropertyKey;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/05/23
|
||||||
|
*/
|
||||||
|
public class CSVResult implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String type;
|
||||||
|
private String title;
|
||||||
|
private String description;
|
||||||
|
private String accessright;
|
||||||
|
private String publication_date;
|
||||||
|
private String publisher;
|
||||||
|
private String keywords;
|
||||||
|
private String country;
|
||||||
|
private String language;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = Constants.addQuotes(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = Constants.addQuotes(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = Constants.addQuotes(title);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = Constants.addQuotes(description);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAccessright() {
|
||||||
|
return accessright;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAccessright(String accessright) {
|
||||||
|
this.accessright = Constants.addQuotes(accessright);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublication_date() {
|
||||||
|
return publication_date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublication_date(String publication_date) {
|
||||||
|
this.publication_date = Constants.addQuotes(publication_date);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublisher() {
|
||||||
|
return publisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublisher(String publisher) {
|
||||||
|
this.publisher = Constants.addQuotes(publisher);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKeywords() {
|
||||||
|
return keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKeywords(String keywords) {
|
||||||
|
this.keywords = Constants.addQuotes(keywords);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCountry() {
|
||||||
|
return country;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCountry(String country) {
|
||||||
|
this.country = Constants.addQuotes(country);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = Constants.addQuotes(language);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -11,6 +11,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.*;
|
import org.apache.spark.sql.*;
|
||||||
|
@ -18,11 +19,15 @@ import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.amazonaws.transform.SimpleTypeUnmarshallers;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||||
import eu.dnetlib.dhp.oa.model.community.Funder;
|
import eu.dnetlib.dhp.oa.model.community.Funder;
|
||||||
import eu.dnetlib.dhp.oa.model.community.Project;
|
import eu.dnetlib.dhp.oa.model.community.Project;
|
||||||
|
import io.netty.util.internal.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
|
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
|
||||||
|
@ -30,6 +35,7 @@ import eu.dnetlib.dhp.oa.model.community.Project;
|
||||||
*/
|
*/
|
||||||
public class SparkDumpFunderResults implements Serializable {
|
public class SparkDumpFunderResults implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkDumpFunderResults.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpFunderResults.class);
|
||||||
|
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
|
@ -65,14 +71,24 @@ public class SparkDumpFunderResults implements Serializable {
|
||||||
.union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class))
|
.union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class))
|
||||||
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
||||||
log.info("Number of result {}", result.count());
|
log.info("Number of result {}", result.count());
|
||||||
|
|
||||||
Dataset<String> tmp = result
|
Dataset<String> tmp = result
|
||||||
.flatMap((FlatMapFunction<CommunityResult, String>) cr -> cr.getProjects().stream().map(p -> {
|
.flatMap((FlatMapFunction<CommunityResult, String>) cr -> cr.getProjects().stream().map(p -> {
|
||||||
return getFunderName(p);
|
return getFunderName(p);
|
||||||
}).collect(Collectors.toList()).iterator(), Encoders.STRING())
|
}).collect(Collectors.toList()).iterator(), Encoders.STRING())
|
||||||
.distinct();
|
.distinct();
|
||||||
List<String> funderList = tmp.collectAsList();
|
List<String> funderList = tmp.collectAsList();
|
||||||
funderList.forEach(funder -> {
|
funderList.stream().parallel().forEach(funder -> {
|
||||||
dumpResults(funder, result, outputPath);
|
result
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<CommunityResult>) r -> Optional.ofNullable(r.getProjects()).isPresent() &&
|
||||||
|
r.getProjects().stream().anyMatch(p -> getFunderName(p).equals(funder)))
|
||||||
|
.map((MapFunction<CommunityResult, String>) r -> MAPPER.writeValueAsString(r), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(outputPath + "/" + funder);
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,14 +97,21 @@ public class SparkDumpFunderResults implements Serializable {
|
||||||
Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
|
Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
|
||||||
if (ofunder.isPresent()) {
|
if (ofunder.isPresent()) {
|
||||||
String fName = ofunder.get().getShortName();
|
String fName = ofunder.get().getShortName();
|
||||||
if (fName.equalsIgnoreCase("ec")) {
|
if (StringUtil.isNullOrEmpty(fName))
|
||||||
|
if (p.getId().indexOf("_") < 0)
|
||||||
|
return p.getId().substring(0, p.getId().indexOf(":"));
|
||||||
|
else
|
||||||
|
return p.getId().substring(0, p.getId().indexOf("_"));
|
||||||
|
// ofunder.get().getName();
|
||||||
|
if (fName.equalsIgnoreCase("ec"))
|
||||||
fName += "_" + ofunder.get().getFundingStream();
|
fName += "_" + ofunder.get().getFundingStream();
|
||||||
}
|
|
||||||
return fName;
|
return fName;
|
||||||
} else {
|
} else {
|
||||||
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
|
String fName = p.getId().substring(0, p.getId().indexOf("_")).toUpperCase();
|
||||||
if (fName.equalsIgnoreCase("ec")) {
|
if (fName.equalsIgnoreCase("ec")) {
|
||||||
if (p.getId().contains("h2020")) {
|
if (p.getId().contains("he")) {
|
||||||
|
fName += "_HE";
|
||||||
|
} else if (p.getId().contains("h2020")) {
|
||||||
fName += "_H2020";
|
fName += "_H2020";
|
||||||
} else {
|
} else {
|
||||||
fName += "_FP7";
|
fName += "_FP7";
|
||||||
|
@ -106,23 +129,4 @@ public class SparkDumpFunderResults implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void dumpResults(String funder, Dataset<CommunityResult> results, String outputPath) {
|
|
||||||
results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
|
|
||||||
if (!Optional.ofNullable(r.getProjects()).isPresent()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
for (Project p : r.getProjects()) {
|
|
||||||
String fName = getFunderName(p);
|
|
||||||
if (fName.equalsIgnoreCase(funder)) {
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}, Encoders.bean(CommunityResult.class))
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath + "/" + funder);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -17,6 +18,8 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
@ -99,13 +102,19 @@ public class SparkResultLinkedToProject implements Serializable {
|
||||||
.map(
|
.map(
|
||||||
t2._1(),
|
t2._1(),
|
||||||
communityMap, Constants.DUMPTYPE.FUNDER.getType());
|
communityMap, Constants.DUMPTYPE.FUNDER.getType());
|
||||||
cr.setProjects(t2._2().getProjectsList());
|
if (cr != null) {
|
||||||
|
cr.setProjects(t2._2().getProjectsList());
|
||||||
|
}
|
||||||
return cr;
|
return cr;
|
||||||
}, Encoders.bean(CommunityResult.class))
|
}, Encoders.bean(CommunityResult.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(
|
||||||
|
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
||||||
|
Encoders.STRING())
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.text(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,270 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.organizationonly;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Container;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Provenance;
|
||||||
|
import eu.dnetlib.dhp.oa.model.Result;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.*;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Datasource;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Organization;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark Job that fires the dump for the entites
|
||||||
|
*/
|
||||||
|
public class SparkDumpOrganizationJob implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(eu.dnetlib.dhp.oa.graph.dump.organizationonly.SparkDumpOrganizationJob.class);
|
||||||
|
public static final String COMPRESSION = "compression";
|
||||||
|
public static final String GZIP = "gzip";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Boolean.TRUE;
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = "/tmp/prod_provision/graph/20_graph_blacklisted/";
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = "/tmp/miriam/organizationsOnly/";
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
// Utils.removeOutputDir(spark, outputPath);
|
||||||
|
organizationMap(spark, inputPath, outputPath);
|
||||||
|
// relationMap2(spark, inputPath, outputPath);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void relationMap2(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "relation", Relation.class)
|
||||||
|
.filter((FilterFunction<Relation>) r -> r.getRelType().equalsIgnoreCase("organizationOrganization"))
|
||||||
|
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
|
||||||
|
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
|
||||||
|
relNew
|
||||||
|
.setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR));
|
||||||
|
relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)));
|
||||||
|
|
||||||
|
relNew
|
||||||
|
.setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR));
|
||||||
|
relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)));
|
||||||
|
|
||||||
|
relNew
|
||||||
|
.setReltype(
|
||||||
|
RelType
|
||||||
|
.newInstance(
|
||||||
|
relation.getRelClass(),
|
||||||
|
relation.getSubRelType()));
|
||||||
|
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
|
||||||
|
if (odInfo.isPresent()) {
|
||||||
|
DataInfo dInfo = odInfo.get();
|
||||||
|
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
|
||||||
|
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
|
||||||
|
relNew
|
||||||
|
.setProvenance(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
dInfo.getProvenanceaction().getClassname(),
|
||||||
|
dInfo.getTrust()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||||
|
relNew.setValidated(relation.getValidated());
|
||||||
|
relNew.setValidationDate(relation.getValidationDate());
|
||||||
|
}
|
||||||
|
|
||||||
|
return relNew;
|
||||||
|
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "relation");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void relationMap(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Dataset<eu.dnetlib.dhp.schema.oaf.Organization> organization = Utils
|
||||||
|
.readPath(spark, inputPath + "organization", eu.dnetlib.dhp.schema.oaf.Organization.class);
|
||||||
|
Dataset<Relation> rels = Utils.readPath(spark, inputPath + "relation", Relation.class);
|
||||||
|
organization
|
||||||
|
.joinWith(rels, organization.col("id").equalTo(rels.col("source")), "left")
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Organization, Relation>, Relation>) t2 -> t2._2(),
|
||||||
|
Encoders.bean(Relation.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json("/tmp/orgSource");
|
||||||
|
|
||||||
|
rels = Utils.readPath(spark, "/tmp/orgSource", Relation.class);
|
||||||
|
|
||||||
|
organization
|
||||||
|
.joinWith(rels, organization.col("id").equalTo(rels.col("target")), "left")
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Organization, Relation>, Relation>) t2 -> t2._2(),
|
||||||
|
Encoders.bean(Relation.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json("/tmp/orgSourceTarget");
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, "/tmp/orgSourceTarget", Relation.class)
|
||||||
|
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
|
||||||
|
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
|
||||||
|
relNew
|
||||||
|
.setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR));
|
||||||
|
relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)));
|
||||||
|
|
||||||
|
relNew
|
||||||
|
.setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR));
|
||||||
|
relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)));
|
||||||
|
|
||||||
|
relNew
|
||||||
|
.setReltype(
|
||||||
|
RelType
|
||||||
|
.newInstance(
|
||||||
|
relation.getRelClass(),
|
||||||
|
relation.getSubRelType()));
|
||||||
|
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
|
||||||
|
if (odInfo.isPresent()) {
|
||||||
|
DataInfo dInfo = odInfo.get();
|
||||||
|
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
|
||||||
|
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
|
||||||
|
relNew
|
||||||
|
.setProvenance(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
dInfo.getProvenanceaction().getClassname(),
|
||||||
|
dInfo.getTrust()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||||
|
relNew.setValidated(relation.getValidated());
|
||||||
|
relNew.setValidationDate(relation.getValidationDate());
|
||||||
|
}
|
||||||
|
|
||||||
|
return relNew;
|
||||||
|
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "relation");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void organizationMap(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "organization", eu.dnetlib.dhp.schema.oaf.Organization.class)
|
||||||
|
.map(
|
||||||
|
(MapFunction<eu.dnetlib.dhp.schema.oaf.Organization, Organization>) o -> mapOrganization(o),
|
||||||
|
Encoders.bean(Organization.class))
|
||||||
|
.filter((FilterFunction<Organization>) o -> o != null)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.json(outputPath + "/organization");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Organization org) {
|
||||||
|
|
||||||
|
Organization organization = new Organization();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalshortname())
|
||||||
|
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalname())
|
||||||
|
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getWebsiteurl())
|
||||||
|
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getAlternativeNames())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setAlternativenames(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(v -> v.getValue())
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> {
|
||||||
|
if (!value.getClassid().equals(eu.dnetlib.dhp.oa.graph.dump.complete.Constants.UNKNOWN)) {
|
||||||
|
organization
|
||||||
|
.setCountry(
|
||||||
|
eu.dnetlib.dhp.oa.model.Country.newInstance(value.getClassid(), value.getClassname()));
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getId())
|
||||||
|
.ifPresent(value -> organization.setId(getEntityId(value, ENTITY_ID_SEPARATOR)));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setPid(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
return organization;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -58,7 +58,12 @@ public class ProjectsSubsetSparkJob implements Serializable {
|
||||||
String projectListPath) {
|
String projectListPath) {
|
||||||
Dataset<String> projectList = spark.read().textFile(projectListPath);
|
Dataset<String> projectList = spark.read().textFile(projectListPath);
|
||||||
Dataset<Project> projects;
|
Dataset<Project> projects;
|
||||||
projects = Utils.readPath(spark, inputPath, Project.class);
|
projects = Utils
|
||||||
|
.readPath(spark, inputPath, Project.class)
|
||||||
|
.map((MapFunction<Project, Project>) p -> {
|
||||||
|
p.setId("40|" + p.getId());
|
||||||
|
return p;
|
||||||
|
}, Encoders.bean(Project.class));
|
||||||
projects
|
projects
|
||||||
.joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left")
|
.joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left")
|
||||||
.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> {
|
.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> {
|
||||||
|
|
|
@ -0,0 +1,241 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.serafeim;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/05/23
|
||||||
|
*/
|
||||||
|
//STEP 2
|
||||||
|
public class SparkSelectResultsAndDumpRelations implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSelectResultsAndDumpRelations.class);
|
||||||
|
private static String RESULT_COMMUNITY_TABLE = "/result_community";
|
||||||
|
private static String COMMUNITY_RESULT_IDS = "/communityResultIds";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSelectResultsAndDumpRelations.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String workingPath = parser.get("workingPath");
|
||||||
|
|
||||||
|
List<String> communityList = null;
|
||||||
|
Optional<String> communities = Optional.ofNullable(parser.get("communities"));
|
||||||
|
if (communities.isPresent()) {
|
||||||
|
communityList = Arrays.asList(communities.get().split(";"));
|
||||||
|
}
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
List<String> finalCommunityList = communityList;
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
run(spark, inputPath, outputPath, workingPath, finalCommunityList);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void run(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
String workingPath,
|
||||||
|
List<String> communityList) {
|
||||||
|
|
||||||
|
// select the result ids related to the set of communities considered
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath, Publication.class, communityList, workingPath, "publication");
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath, Dataset.class, communityList, workingPath, "dataset");
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath, Software.class, communityList, workingPath, "software");
|
||||||
|
writeCommunityRelatedIds(
|
||||||
|
spark, inputPath, OtherResearchProduct.class, communityList,
|
||||||
|
workingPath, "otherresearchproduct");
|
||||||
|
|
||||||
|
// select the relations with semantics cites
|
||||||
|
org.apache.spark.sql.Dataset<Relation> relations = Utils
|
||||||
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
|
r.getRelClass().equals(ModelConstants.CITES));
|
||||||
|
|
||||||
|
// select the relations having as source one of the results related to the
|
||||||
|
// communities
|
||||||
|
org.apache.spark.sql.Dataset<String> communityResultIds = spark
|
||||||
|
.read()
|
||||||
|
.textFile(workingPath + COMMUNITY_RESULT_IDS)
|
||||||
|
.distinct();
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/publication", Publication.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Publication>) p -> !p.getDataInfo().getDeletedbyinference()
|
||||||
|
&& !p.getDataInfo().getInvisible())
|
||||||
|
.map((MapFunction<Publication, String>) p -> p.getId(), Encoders.STRING())
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/dataset", Dataset.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Dataset>) p -> !p.getDataInfo().getDeletedbyinference()
|
||||||
|
&& !p.getDataInfo().getInvisible())
|
||||||
|
.map((MapFunction<Dataset, String>) p -> p.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/software", Software.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Software>) p -> !p.getDataInfo().getDeletedbyinference()
|
||||||
|
&& !p.getDataInfo().getInvisible())
|
||||||
|
.map((MapFunction<Software, String>) p -> p.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<OtherResearchProduct>) p -> !p.getDataInfo().getDeletedbyinference()
|
||||||
|
&& !p.getDataInfo().getInvisible())
|
||||||
|
.map((MapFunction<OtherResearchProduct, String>) p -> p.getId(), Encoders.STRING()))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(workingPath + "/resultIds");
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<String> resultIds = spark.read().textFile(workingPath + "/resultIds");
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Relation> oksource = communityResultIds
|
||||||
|
.joinWith(relations, communityResultIds.col("value").equalTo(relations.col("source")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<String, Relation>, Relation>) t2 -> t2._2(),
|
||||||
|
Encoders.bean(Relation.class));
|
||||||
|
oksource
|
||||||
|
.joinWith(resultIds, oksource.col("target").equalTo(resultIds.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
|
||||||
|
writeNodes(
|
||||||
|
spark, inputPath + "/publication", Publication.class, outputPath + "/publication",
|
||||||
|
outputPath + "/relation", workingPath);
|
||||||
|
writeNodes(
|
||||||
|
spark, inputPath + "/dataset", Dataset.class, outputPath + "/dataset", outputPath + "/relation",
|
||||||
|
workingPath);
|
||||||
|
writeNodes(
|
||||||
|
spark, inputPath + "/software", Software.class, outputPath + "/software", outputPath + "/relation",
|
||||||
|
workingPath);
|
||||||
|
writeNodes(
|
||||||
|
spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class,
|
||||||
|
outputPath + "/otherresearchproduct", outputPath + "/relation", workingPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void writeNodes(SparkSession spark, String inputPath, Class<R> clazz,
|
||||||
|
String outputPath, String relationPath, String workingPath) {
|
||||||
|
org.apache.spark.sql.Dataset<Relation> citingRelations = Utils.readPath(spark, relationPath, Relation.class);
|
||||||
|
org.apache.spark.sql.Dataset<R> result = Utils
|
||||||
|
.readPath(spark, inputPath, clazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
|
||||||
|
!p.getDataInfo().getInvisible());
|
||||||
|
|
||||||
|
// take the distinct result id for source and target of the relations
|
||||||
|
citingRelations
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<Relation, String>) r -> Arrays
|
||||||
|
.asList(r.getSource(), r.getTarget())
|
||||||
|
.iterator(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.distinct()
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(workingPath + "/relationIds");
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<String> relationIds = spark.read().textFile(workingPath + "/relationIds");
|
||||||
|
|
||||||
|
relationIds
|
||||||
|
.joinWith(result, relationIds.col("value").equalTo(result.col("id")))
|
||||||
|
.map((MapFunction<Tuple2<String, R>, R>) t2 -> t2._2(), Encoders.bean(clazz))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void writeCommunityRelatedIds(SparkSession spark, String inputPath,
|
||||||
|
Class<R> clazz, List<String> communityList, String outputPath, String resultType) {
|
||||||
|
org.apache.spark.sql.Dataset<R> results = Utils
|
||||||
|
.readPath(spark, inputPath + "/" + resultType, clazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() &&
|
||||||
|
!p.getDataInfo().getInvisible() &&
|
||||||
|
isRelatedToCommunities(p, communityList));
|
||||||
|
results
|
||||||
|
.map((MapFunction<R, String>) Result::getId, Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.text(outputPath + COMMUNITY_RESULT_IDS);
|
||||||
|
|
||||||
|
// results
|
||||||
|
// // .repartition(10000)
|
||||||
|
// .write()
|
||||||
|
// .option("compression", "gzip")
|
||||||
|
// .mode(SaveMode.Append)
|
||||||
|
// .json(outputPath + "/" + resultType);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> boolean isRelatedToCommunities(R p, List<String> communityList) {
|
||||||
|
return p
|
||||||
|
.getContext()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
c -> communityList.contains(c.getId()) ||
|
||||||
|
(c.getId().contains("::")
|
||||||
|
&& communityList.contains(c.getId().substring(0, c.getId().indexOf("::")))));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 21/07/22
|
||||||
|
*/
|
||||||
|
public class MasterDuplicate implements Serializable {
|
||||||
|
private String duplicate;
|
||||||
|
private String master;
|
||||||
|
|
||||||
|
public String getDuplicate() {
|
||||||
|
return duplicate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDuplicate(String duplicate) {
|
||||||
|
this.duplicate = duplicate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMaster() {
|
||||||
|
return master;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaster(String master) {
|
||||||
|
this.master = master;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,97 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.mongodb.DBCursor;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.DbClient;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
|
public class ReadMasterDuplicateFromDB {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static final String QUERY = "SELECT id as master, duplicate FROM dsm_dedup_services; ";
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
ReadMasterDuplicateFromDB.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/datasourcemaster_parameters.json")));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String dbUrl = parser.get("postgresUrl");
|
||||||
|
final String dbUser = parser.get("postgresUser");
|
||||||
|
final String dbPassword = parser.get("postgresPassword");
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
|
execute(dbUrl, dbUser, dbPassword, fsDataOutputStream);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void execute(String dbUrl, String dbUser, String dbPassword, FSDataOutputStream fos) {
|
||||||
|
try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
|
||||||
|
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8))) {
|
||||||
|
dbClient.processResults(QUERY, rs -> writeMap(datasourceMasterMap(rs), writer));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MasterDuplicate datasourceMasterMap(ResultSet rs) {
|
||||||
|
try {
|
||||||
|
MasterDuplicate dm = new MasterDuplicate();
|
||||||
|
String duplicate = rs.getString("duplicate");
|
||||||
|
dm.setDuplicate(OafMapperUtils.createOpenaireId(10, duplicate, true));
|
||||||
|
String master = rs.getString("master");
|
||||||
|
dm.setMaster(OafMapperUtils.createOpenaireId(10, master, true));
|
||||||
|
|
||||||
|
return dm;
|
||||||
|
|
||||||
|
} catch (final SQLException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void writeMap(final MasterDuplicate dm, BufferedWriter writer) {
|
||||||
|
try {
|
||||||
|
writer.write(OBJECT_MAPPER.writeValueAsString(dm));
|
||||||
|
writer.newLine();
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,199 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolverFactory;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.Param;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.SelectionConstraints;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark Job that fires the dump for the entities
|
||||||
|
*/
|
||||||
|
public class SparkDumpResult implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpResult.class);
|
||||||
|
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||||
|
public static final String COMPRESSION = "compression";
|
||||||
|
public static final String GZIP = "gzip";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpResult.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultType = parser.get("resultType");
|
||||||
|
log.info("resultType: {}", resultType);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
Optional<String> pathString = Optional.ofNullable(parser.get("pathMap"));
|
||||||
|
HashMap<String, String> pathMap = null;
|
||||||
|
if (pathString.isPresent()) {
|
||||||
|
pathMap = new Gson().fromJson(parser.get("pathMap"), HashMap.class);
|
||||||
|
log.info("pathMap: {}", new Gson().toJson(pathMap));
|
||||||
|
}
|
||||||
|
|
||||||
|
final Optional<String> parameter = Optional.ofNullable(parser.get("selectionCriteria"));
|
||||||
|
SelectionConstraints selectionConstraints = null;
|
||||||
|
if (parameter.isPresent()) {
|
||||||
|
selectionConstraints = new ObjectMapper().readValue(parameter.get(), SelectionConstraints.class);
|
||||||
|
selectionConstraints.addResolver(resolver);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz = (Class<? extends eu.dnetlib.dhp.schema.oaf.Result>) Class
|
||||||
|
.forName(resultClassName);
|
||||||
|
|
||||||
|
run(
|
||||||
|
isSparkSessionManaged, inputPath, outputPath, pathMap, selectionConstraints, inputClazz,
|
||||||
|
resultType);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
|
||||||
|
HashMap<String, String> pathMap, SelectionConstraints selectionConstraints,
|
||||||
|
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz, String resultType) {
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
HashMap<String, String> finalPathMap = pathMap;
|
||||||
|
SelectionConstraints finalSelectionConstraints = selectionConstraints;
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath + "/original/" + resultType);
|
||||||
|
Utils.removeOutputDir(spark, outputPath + "/dump/" + resultType);
|
||||||
|
resultDump(
|
||||||
|
spark, inputPath, outputPath, inputClazz, finalPathMap,
|
||||||
|
finalSelectionConstraints, resultType);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void resultDump(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<I> inputClazz,
|
||||||
|
Map<String, String> pathMap,
|
||||||
|
SelectionConstraints selectionConstraints,
|
||||||
|
String resultType) {
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<I, I>) value -> filterResult(
|
||||||
|
value, pathMap, selectionConstraints, inputClazz, resultType),
|
||||||
|
Encoders.bean(inputClazz))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/" + resultType);
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/" + resultType, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<I, GraphResult>) value -> (GraphResult) ResultMapper
|
||||||
|
.map(
|
||||||
|
value, null,
|
||||||
|
Constants.DUMPTYPE.COMPLETE.getType()),
|
||||||
|
Encoders.bean(GraphResult.class))
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option(COMPRESSION, GZIP)
|
||||||
|
.text(outputPath + "/dump/" + resultType);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends eu.dnetlib.dhp.schema.oaf.Result> I filterResult(I value, Map<String, String> pathMap,
|
||||||
|
SelectionConstraints selectionConstraints, Class<I> inputClazz,
|
||||||
|
String resultType) {
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||||
|
|
||||||
|
if (Boolean.FALSE.equals(odInfo.isPresent())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|
||||||
|
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isCompatible(value.getResulttype().getClassid(), resultType)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (selectionConstraints != null) {
|
||||||
|
Param param = new Param();
|
||||||
|
String json = new Gson().toJson(value, inputClazz);
|
||||||
|
DocumentContext jsonContext = JsonPath.parse(json);
|
||||||
|
|
||||||
|
for (String key : pathMap.keySet()) {
|
||||||
|
try {
|
||||||
|
param.insert(key, jsonContext.read(pathMap.get(key)));
|
||||||
|
} catch (com.jayway.jsonpath.PathNotFoundException e) {
|
||||||
|
param.insert(key, new ArrayList<>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!selectionConstraints.verifyCriteria(param)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isCompatible(String classid, String resultType) {
|
||||||
|
return (classid.equals(resultType) || (classid.equals("other") && resultType.equals("otherresearchproduct")));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,329 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import javax.print.attribute.standard.MediaSize;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.*;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import scala.Function1;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/11/22
|
||||||
|
*/
|
||||||
|
public class SparkSelectSubset implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSelectSubset.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSelectSubset.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
Optional<String> rs = Optional.ofNullable(parser.get("removeSet"));
|
||||||
|
final Set<String> removeSet = new HashSet<>();
|
||||||
|
if (rs.isPresent()) {
|
||||||
|
Collections.addAll(removeSet, rs.get().split(";"));
|
||||||
|
}
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
selectSubset(spark, inputPath, outputPath, removeSet);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void selectSubset(SparkSession spark, String inputPath, String outputPath, Set<String> removeSet) {
|
||||||
|
Dataset<Relation> relation = Utils
|
||||||
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference()
|
||||||
|
&& !removeSet.contains(r.getRelClass()) &&
|
||||||
|
!r.getRelType().equals("resultService"));
|
||||||
|
|
||||||
|
Dataset<String> resultIds = Utils
|
||||||
|
.readPath(spark, outputPath + "/original/publication", Publication.class)
|
||||||
|
|
||||||
|
.map((MapFunction<Publication, String>) p -> p.getId(), Encoders.STRING())
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class)
|
||||||
|
|
||||||
|
.map((MapFunction<eu.dnetlib.dhp.schema.oaf.Dataset, String>) d -> d.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/software", Software.class)
|
||||||
|
|
||||||
|
.map((MapFunction<Software, String>) s -> s.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/otherresearchproduct", OtherResearchProduct.class)
|
||||||
|
|
||||||
|
.map((MapFunction<OtherResearchProduct, String>) o -> o.getId(), Encoders.STRING()));
|
||||||
|
|
||||||
|
// select result -> result relations
|
||||||
|
Dataset<Relation> relResultResult = relation
|
||||||
|
.joinWith(resultIds, relation.col("source").equalTo(resultIds.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
relResultResult
|
||||||
|
.joinWith(resultIds, relResultResult.col("target").equalTo(resultIds.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath + "/original/relation");
|
||||||
|
|
||||||
|
// save the relations among other entities and the results
|
||||||
|
Dataset<String> otherIds = Utils
|
||||||
|
.readPath(spark, inputPath + "/organization", Organization.class)
|
||||||
|
.filter((FilterFunction<Organization>) e -> !e.getDataInfo().getDeletedbyinference())
|
||||||
|
.map((MapFunction<Organization, String>) o -> o.getId(), Encoders.STRING())
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/project", Project.class)
|
||||||
|
.filter((FilterFunction<Project>) e -> !e.getDataInfo().getDeletedbyinference())
|
||||||
|
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/datasource", Datasource.class)
|
||||||
|
.filter((FilterFunction<Datasource>) e -> !e.getDataInfo().getDeletedbyinference())
|
||||||
|
.map((MapFunction<Datasource, String>) d -> d.getId(), Encoders.STRING()));
|
||||||
|
|
||||||
|
Dataset<Relation> relResultOther = relation
|
||||||
|
.joinWith(resultIds, relation.col("source").equalTo(resultIds.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
relResultOther
|
||||||
|
.joinWith(otherIds, relResultOther.col("target").equalTo(otherIds.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/relation");
|
||||||
|
|
||||||
|
Dataset<Relation> relOtherResult = relation
|
||||||
|
.joinWith(resultIds, relation.col("target").equalTo(resultIds.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
relOtherResult
|
||||||
|
.joinWith(otherIds, relOtherResult.col("source").equalTo(otherIds.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/relation");
|
||||||
|
|
||||||
|
Dataset<String> relAll = Utils
|
||||||
|
.readPath(spark, outputPath + "/original/relation", Relation.class)
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<Relation, String>) r -> Arrays.asList(r.getSource(), r.getTarget()).iterator(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.distinct();
|
||||||
|
|
||||||
|
// Save the entities in relations with at least one result
|
||||||
|
Dataset<Organization> organization = Utils
|
||||||
|
.readPath(spark, inputPath + "/organization", Organization.class)
|
||||||
|
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference());
|
||||||
|
organization
|
||||||
|
.joinWith(relAll, organization.col("id").equalTo(relAll.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Organization, String>, Organization>) t2 -> t2._1(),
|
||||||
|
Encoders.bean(Organization.class))
|
||||||
|
.groupByKey((MapFunction<Organization, String>) v -> v.getId(), Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, Organization, Organization>) (k, it) -> it.next(),
|
||||||
|
Encoders.bean(Organization.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/organization");
|
||||||
|
|
||||||
|
Dataset<Datasource> datasource = Utils
|
||||||
|
.readPath(spark, inputPath + "/datasource", Datasource.class)
|
||||||
|
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference());
|
||||||
|
datasource
|
||||||
|
.joinWith(relAll, datasource.col("id").equalTo(relAll.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Datasource, String>, Datasource>) t2 -> t2._1(), Encoders.bean(Datasource.class))
|
||||||
|
.groupByKey((MapFunction<Datasource, String>) v -> v.getId(), Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, Datasource, Datasource>) (k, it) -> it.next(),
|
||||||
|
Encoders.bean(Datasource.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/datasource");
|
||||||
|
|
||||||
|
// plus we need to dump all the datasource in collectedfrom hostedby
|
||||||
|
Dataset<String> cfhb_orig = Utils
|
||||||
|
.readPath(spark, outputPath + "/original/publication", Publication.class)
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<Publication, String>) p -> {
|
||||||
|
List<String> ret = new ArrayList<>();
|
||||||
|
p.getInstance().stream().forEach(i -> {
|
||||||
|
if (Optional.ofNullable(i.getHostedby()).isPresent()
|
||||||
|
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
|
||||||
|
ret.add(i.getHostedby().getKey());
|
||||||
|
});
|
||||||
|
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
|
||||||
|
p.getCollectedfrom().stream().forEach(cf -> {
|
||||||
|
if (Optional.ofNullable(cf.getKey()).isPresent())
|
||||||
|
ret.add(cf.getKey());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return ret.iterator();
|
||||||
|
}, Encoders.STRING())
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class)
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<eu.dnetlib.dhp.schema.oaf.Dataset, String>) p -> {
|
||||||
|
List<String> ret = new ArrayList<>();
|
||||||
|
p.getInstance().stream().forEach(i -> {
|
||||||
|
if (Optional.ofNullable(i.getHostedby()).isPresent()
|
||||||
|
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
|
||||||
|
ret.add(i.getHostedby().getKey());
|
||||||
|
});
|
||||||
|
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
|
||||||
|
p.getCollectedfrom().stream().forEach(cf -> {
|
||||||
|
if (Optional.ofNullable(cf.getKey()).isPresent())
|
||||||
|
ret.add(cf.getKey());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return ret.iterator();
|
||||||
|
}, Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/software", Software.class)
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<Software, String>) p -> {
|
||||||
|
List<String> ret = new ArrayList<>();
|
||||||
|
p.getInstance().stream().forEach(i -> {
|
||||||
|
if (Optional.ofNullable(i.getHostedby()).isPresent()
|
||||||
|
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
|
||||||
|
ret.add(i.getHostedby().getKey());
|
||||||
|
});
|
||||||
|
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
|
||||||
|
p.getCollectedfrom().stream().forEach(cf -> {
|
||||||
|
if (Optional.ofNullable(cf.getKey()).isPresent())
|
||||||
|
ret.add(cf.getKey());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return ret.iterator();
|
||||||
|
}, Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/otherresearchproduct", OtherResearchProduct.class)
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<OtherResearchProduct, String>) p -> {
|
||||||
|
List<String> ret = new ArrayList<>();
|
||||||
|
p.getInstance().stream().forEach(i -> {
|
||||||
|
if (Optional.ofNullable(i.getHostedby()).isPresent()
|
||||||
|
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
|
||||||
|
ret.add(i.getHostedby().getKey());
|
||||||
|
});
|
||||||
|
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
|
||||||
|
p.getCollectedfrom().stream().forEach(cf -> {
|
||||||
|
if (Optional.ofNullable(cf.getKey()).isPresent())
|
||||||
|
ret.add(cf.getKey());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return ret.iterator();
|
||||||
|
}, Encoders.STRING()))
|
||||||
|
.filter((FilterFunction<String>) s -> !s.equals(ModelConstants.UNKNOWN_REPOSITORY.getKey()))
|
||||||
|
.distinct();
|
||||||
|
|
||||||
|
datasource
|
||||||
|
.joinWith(cfhb_orig, datasource.col("id").equalTo(cfhb_orig.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Datasource, String>, Datasource>) t2 -> t2._1(), Encoders.bean(Datasource.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/datasource");
|
||||||
|
|
||||||
|
Dataset<Project> project = Utils
|
||||||
|
.readPath(spark, inputPath + "/project", Project.class)
|
||||||
|
.filter((FilterFunction<Project>) d -> !d.getDataInfo().getDeletedbyinference());
|
||||||
|
project
|
||||||
|
.joinWith(relAll, project.col("id").equalTo(relAll.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> t2._1(), Encoders.bean(Project.class))
|
||||||
|
.groupByKey((MapFunction<Project, String>) v -> v.getId(), Encoders.STRING())
|
||||||
|
.mapGroups((MapGroupsFunction<String, Project, Project>) (k, it) -> it.next(), Encoders.bean(Project.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/project");
|
||||||
|
|
||||||
|
// save the relations among entities different from the result
|
||||||
|
|
||||||
|
Dataset<String> selectedIDs = Utils
|
||||||
|
.readPath(spark, outputPath + "/original/project", Project.class)
|
||||||
|
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/organization", Organization.class)
|
||||||
|
.map((MapFunction<Organization, String>) o -> o.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath + "/original/datasource", Datasource.class)
|
||||||
|
.map((MapFunction<Datasource, String>) d -> d.getId(), Encoders.STRING()));
|
||||||
|
|
||||||
|
Dataset<Relation> relOtherOther = relation
|
||||||
|
.joinWith(selectedIDs, relation.col("source").equalTo(selectedIDs.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
relOtherOther
|
||||||
|
.joinWith(selectedIDs, relOtherOther.col("target").equalTo(selectedIDs.col("value")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/original/relation");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,133 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 15/11/22
|
||||||
|
*/
|
||||||
|
public class SparkSelectValidContext implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidContext.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSelectValidContext.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_select_context.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String contextPath = parser.get("contextPath");
|
||||||
|
log.info("contextPath: {}", contextPath);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
log.info("communityMapPath: {}", communityMapPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
selectValidContext(spark, inputPath, contextPath, communityMapPath, outputPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void selectValidContext(SparkSession spark, String inputPath, String contextPath,
|
||||||
|
String communityMapPath, String outputPath) {
|
||||||
|
List<String> keys = Arrays
|
||||||
|
.asList(Utils.getCommunityMap(spark, communityMapPath).keySet().stream().toArray(String[]::new));
|
||||||
|
Dataset<String> context = getFilter(spark, inputPath + "/publication", keys, Publication.class)
|
||||||
|
.union(getFilter(spark, inputPath + "/dataset", keys, eu.dnetlib.dhp.schema.oaf.Dataset.class))
|
||||||
|
.union(getFilter(spark, inputPath + "/software", keys, Software.class))
|
||||||
|
.union(getFilter(spark, inputPath + "/otherresearchproduct", keys, OtherResearchProduct.class))
|
||||||
|
.distinct();
|
||||||
|
|
||||||
|
context.foreach((ForeachFunction<String>) c -> System.out.println(c));
|
||||||
|
|
||||||
|
Dataset<ResearchCommunity> researchCommunity = Utils.readPath(spark, contextPath, ResearchCommunity.class);
|
||||||
|
|
||||||
|
researchCommunity
|
||||||
|
.joinWith(context, researchCommunity.col("acronym").equalTo(context.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<ResearchCommunity, String>, ResearchCommunity>) t2 -> t2._1(),
|
||||||
|
Encoders.bean(ResearchCommunity.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends Result> Dataset<String> getFilter(SparkSession spark, String inputPath,
|
||||||
|
List<String> keys, Class<I> inputClazz) {
|
||||||
|
|
||||||
|
return Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.filter((FilterFunction<I>) r -> isPresentContext(r))
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<I, String>) r -> r
|
||||||
|
.getContext()
|
||||||
|
.stream()
|
||||||
|
.map(c -> extract(c.getId(), keys))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.iterator(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.filter(Objects::nonNull);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends Result> boolean isPresentContext(I r) {
|
||||||
|
return Optional.ofNullable(r.getContext()).isPresent();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String extract(String c, List<String> keySet) {
|
||||||
|
if (keySet.contains(c))
|
||||||
|
return c;
|
||||||
|
if (c.contains(":") && keySet.contains(c.substring(0, c.indexOf(":"))))
|
||||||
|
return c.substring(0, c.indexOf(":"));
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,116 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntitiesId;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getValidRelations;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Relation;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 15/11/22
|
||||||
|
*/
|
||||||
|
public class SparkSelectValidRelation implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelation.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSelectValidRelation.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
// results dumped
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
// all relations plus those produced via context and extracted from results
|
||||||
|
final String relationPath = parser.get("relationPath");
|
||||||
|
log.info("relationPath: {}", relationPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
selectValidRelation(spark, inputPath, relationPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void selectValidRelation(SparkSession spark, String inputPath,
|
||||||
|
String relationPath) {
|
||||||
|
// read the results
|
||||||
|
|
||||||
|
getValidRelations(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, relationPath, Relation.class),
|
||||||
|
getEntitiesId(spark, inputPath))
|
||||||
|
|
||||||
|
// Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
|
||||||
|
// .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
|
||||||
|
// .map(
|
||||||
|
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
|
||||||
|
// t2._1()._2().getTarget().getId(), t2._1()._2()),
|
||||||
|
// Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
|
||||||
|
//
|
||||||
|
// relJoinSource
|
||||||
|
// .joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
|
||||||
|
// .map(
|
||||||
|
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
|
||||||
|
// Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(inputPath + "/relation");
|
||||||
|
|
||||||
|
// relJoinSource = relationSource
|
||||||
|
// .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
|
||||||
|
// .map(
|
||||||
|
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
|
||||||
|
// t2._1()._2().getTarget().getId(), t2._1()._2()),
|
||||||
|
// Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
|
||||||
|
//
|
||||||
|
// relJoinSource
|
||||||
|
// .joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
|
||||||
|
// .map(
|
||||||
|
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
|
||||||
|
// Encoders.bean(Relation.class))
|
||||||
|
// .write()
|
||||||
|
// .mode(SaveMode.Append)
|
||||||
|
// .option("compression", "gzip")
|
||||||
|
// .json(inputPath + "/relation");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,153 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.Relation;
|
||||||
|
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 15/11/22
|
||||||
|
*/
|
||||||
|
public class SparkSelectValidRelationContext implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationContext.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSelectValidRelationContext.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_context_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
// results dumped
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String contextRelationPath = parser.get("contextRelationPath");
|
||||||
|
log.info("contextRelationPath: {}", contextRelationPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
selectValidRelation(spark, inputPath, contextRelationPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void selectValidRelation(SparkSession spark, String inputPath,
|
||||||
|
String contextRelationPath) {
|
||||||
|
// read the results
|
||||||
|
Dataset<String> dumpedIds = Utils
|
||||||
|
.readPath(spark, inputPath + "/publication", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/dataset", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/software", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class)
|
||||||
|
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class)
|
||||||
|
.map(
|
||||||
|
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Organization, String>) o -> o.getId(),
|
||||||
|
Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class)
|
||||||
|
.map(
|
||||||
|
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Project, String>) o -> o.getId(), Encoders.STRING()))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class)
|
||||||
|
.map(
|
||||||
|
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
|
||||||
|
Encoders.STRING()));
|
||||||
|
|
||||||
|
Dataset<Tuple2<String, Relation>> relationSource = Utils
|
||||||
|
.readPath(spark, contextRelationPath + "/context", Relation.class)
|
||||||
|
.union(Utils.readPath(spark, contextRelationPath + "/contextOrg", Relation.class))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getSource(), r),
|
||||||
|
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
|
||||||
|
|
||||||
|
Dataset<ResearchCommunity> allowedContext = Utils
|
||||||
|
.readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class);
|
||||||
|
|
||||||
|
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
|
||||||
|
.joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
|
||||||
|
t2._1()._2().getTarget(), t2._1()._2()),
|
||||||
|
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
|
||||||
|
|
||||||
|
relJoinSource
|
||||||
|
.joinWith(allowedContext, relJoinSource.col("_1").equalTo(allowedContext.col("id")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Tuple2<String, Relation>, ResearchCommunity>, Relation>) t2 -> t2._1()._2(),
|
||||||
|
Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(inputPath + "/relation");
|
||||||
|
|
||||||
|
relJoinSource = relationSource
|
||||||
|
.joinWith(allowedContext, relationSource.col("_1").equalTo(allowedContext.col("id")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Tuple2<String, Relation>, ResearchCommunity>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
|
||||||
|
t2._1()._2().getTarget(), t2._1()._2()),
|
||||||
|
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
|
||||||
|
|
||||||
|
relJoinSource
|
||||||
|
.joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
|
||||||
|
Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(inputPath + "/relation");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
@VerbClass("contains")
|
||||||
|
public class ContainsVerb implements Selection, Serializable {
|
||||||
|
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public ContainsVerb() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContainsVerb(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.contains(param);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
@VerbClass("contains_ignorecase")
|
||||||
|
public class ContainsVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public ContainsVerbIgnoreCase() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContainsVerbIgnoreCase(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.toLowerCase().contains(param.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
@VerbClass("equals")
|
||||||
|
public class EqualVerb implements Selection, Serializable {
|
||||||
|
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public EqualVerb() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public EqualVerb(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.equals(param);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
@VerbClass("equals_ignorecase")
|
||||||
|
public class EqualVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public EqualVerbIgnoreCase() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public EqualVerbIgnoreCase(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.equalsIgnoreCase(param);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/11/22
|
||||||
|
*/
|
||||||
|
@VerbClass("greater_than")
|
||||||
|
public class GreatThanVerb implements Selection, Serializable {
|
||||||
|
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public GreatThanVerb() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public GreatThanVerb(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.compareTo(param) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
|
||||||
|
|
||||||
|
import java.lang.reflect.Type;
|
||||||
|
|
||||||
|
import com.google.gson.*;
|
||||||
|
|
||||||
|
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
|
||||||
|
|
||||||
|
private static final String CLASSNAME = "CLASSNAME";
|
||||||
|
private static final String DATA = "DATA";
|
||||||
|
|
||||||
|
public Object deserialize(
|
||||||
|
JsonElement jsonElement,
|
||||||
|
Type type,
|
||||||
|
JsonDeserializationContext jsonDeserializationContext)
|
||||||
|
throws JsonParseException {
|
||||||
|
|
||||||
|
JsonObject jsonObject = jsonElement.getAsJsonObject();
|
||||||
|
JsonPrimitive prim = (JsonPrimitive) jsonObject.get(CLASSNAME);
|
||||||
|
String className = prim.getAsString();
|
||||||
|
Class klass = getObjectClass(className);
|
||||||
|
return jsonDeserializationContext.deserialize(jsonObject.get(DATA), klass);
|
||||||
|
}
|
||||||
|
|
||||||
|
public JsonElement serialize(
|
||||||
|
Object jsonElement, Type type, JsonSerializationContext jsonSerializationContext) {
|
||||||
|
JsonObject jsonObject = new JsonObject();
|
||||||
|
jsonObject.addProperty(CLASSNAME, jsonElement.getClass().getName());
|
||||||
|
jsonObject.add(DATA, jsonSerializationContext.serialize(jsonElement));
|
||||||
|
return jsonObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** **** Helper method to get the className of the object to be deserialized **** */
|
||||||
|
public Class getObjectClass(String className) {
|
||||||
|
try {
|
||||||
|
return Class.forName(className);
|
||||||
|
} catch (ClassNotFoundException e) {
|
||||||
|
// e.printStackTrace();
|
||||||
|
throw new JsonParseException(e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 11/11/22
|
||||||
|
*/
|
||||||
|
@VerbClass("lesser_than")
|
||||||
|
public class LessThanVerb implements Selection, Serializable {
|
||||||
|
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public LessThanVerb() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public LessThanVerb(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.compareTo(param) < 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue