forked from D-Net/dnet-hadoop
Merge branch 'dump' of https://code-repo.d4science.org/miriam.baglioni/dnet-hadoop into resolve_conflicts_pr40_dump
This commit is contained in:
commit
5b994d7ccf
|
@ -87,6 +87,11 @@
|
||||||
<groupId>org.postgresql</groupId>
|
<groupId>org.postgresql</groupId>
|
||||||
<artifactId>postgresql</artifactId>
|
<artifactId>postgresql</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.squareup.okhttp3</groupId>
|
||||||
|
<artifactId>okhttp</artifactId>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import okhttp3.MediaType;
|
||||||
|
import okhttp3.RequestBody;
|
||||||
|
import okhttp3.internal.Util;
|
||||||
|
import okio.BufferedSink;
|
||||||
|
import okio.Okio;
|
||||||
|
import okio.Source;
|
||||||
|
|
||||||
|
public class InputStreamRequestBody extends RequestBody {
|
||||||
|
|
||||||
|
private InputStream inputStream;
|
||||||
|
private MediaType mediaType;
|
||||||
|
private long lenght;
|
||||||
|
|
||||||
|
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
||||||
|
|
||||||
|
return new InputStreamRequestBody(inputStream, mediaType, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
|
||||||
|
this.inputStream = inputStream;
|
||||||
|
this.mediaType = mediaType;
|
||||||
|
this.lenght = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MediaType contentType() {
|
||||||
|
return mediaType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long contentLength() {
|
||||||
|
|
||||||
|
return lenght;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(BufferedSink sink) throws IOException {
|
||||||
|
Source source = null;
|
||||||
|
try {
|
||||||
|
source = Okio.source(inputStream);
|
||||||
|
sink.writeAll(source);
|
||||||
|
} finally {
|
||||||
|
Util.closeQuietly(source);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
public class MissingConceptDoiException extends Throwable {
|
||||||
|
public MissingConceptDoiException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,266 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||||
|
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
|
||||||
|
import okhttp3.*;
|
||||||
|
|
||||||
|
public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
|
|
||||||
|
String urlString;
|
||||||
|
String bucket;
|
||||||
|
|
||||||
|
String deposition_id;
|
||||||
|
String access_token;
|
||||||
|
|
||||||
|
|
||||||
|
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
||||||
|
|
||||||
|
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
|
||||||
|
|
||||||
|
public String getUrlString() {
|
||||||
|
return urlString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUrlString(String urlString) {
|
||||||
|
this.urlString = urlString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBucket() {
|
||||||
|
return bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBucket(String bucket) {
|
||||||
|
this.bucket = bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDeposition_id(String deposition_id){this.deposition_id = deposition_id;}
|
||||||
|
|
||||||
|
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
|
||||||
|
|
||||||
|
this.urlString = urlString;
|
||||||
|
this.access_token = access_token;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int newDeposition() throws IOException {
|
||||||
|
String json = "{}";
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, json);
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(urlString)
|
||||||
|
.addHeader("Content-Type", "application/json") // add request headers
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.post(body)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
|
||||||
|
// Get response body
|
||||||
|
json = response.body().string();
|
||||||
|
|
||||||
|
ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
|
||||||
|
this.bucket = newSubmission.getLinks().getBucket();
|
||||||
|
this.deposition_id = newSubmission.getId();
|
||||||
|
|
||||||
|
return response.code();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Upload files in Zenodo.
|
||||||
|
* @param is the inputStream for the file to upload
|
||||||
|
* @param file_name the name of the file as it will appear on Zenodo
|
||||||
|
* @param len the size of the file
|
||||||
|
* @return the response code
|
||||||
|
*/
|
||||||
|
public int uploadIS(InputStream is, String file_name, long len) throws IOException {
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(bucket + "/" + file_name)
|
||||||
|
.addHeader("Content-Type", "application/zip") // add request headers
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
return response.code();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associates metadata information to the current deposition
|
||||||
|
* @param metadata the metadata
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int sendMretadata(String metadata) throws IOException {
|
||||||
|
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, metadata);
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(urlString + "/" + deposition_id)
|
||||||
|
.addHeader("Content-Type", "application/json") // add request headers
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.put(body)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
|
||||||
|
return response.code();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To publish the current deposition. It works for both new deposition or new version of an old deposition
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int publish() throws IOException {
|
||||||
|
|
||||||
|
String json = "{}";
|
||||||
|
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(urlString + "/" + deposition_id + "/actions/publish")
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
|
||||||
|
return response.code();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To create a new version of an already published deposition.
|
||||||
|
* It sets the deposition_id and the bucket to be used for the new version.
|
||||||
|
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is
|
||||||
|
* the last part of the url for the DOI Zenodo suggests to use to cite all versions:
|
||||||
|
* DOI: 10.xxx/zenodo.656930 concept_rec_id = 656930
|
||||||
|
* @return response code
|
||||||
|
* @throws IOException
|
||||||
|
* @throws MissingConceptDoiException
|
||||||
|
*/
|
||||||
|
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||||
|
setDepositionId(concept_rec_id);
|
||||||
|
String json = "{}";
|
||||||
|
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
|
||||||
|
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||||
|
String latest_draft = zenodoModel.getLinks().getLatest_draft();
|
||||||
|
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
|
||||||
|
bucket = getBucket(latest_draft);
|
||||||
|
return response.code();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
|
||||||
|
|
||||||
|
for(ZenodoModel zm : zenodoModelList){
|
||||||
|
if (zm.getConceptrecid().equals(concept_rec_id)){
|
||||||
|
deposition_id = zm.getId();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getPrevDepositions() throws IOException {
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(urlString)
|
||||||
|
.addHeader("Content-Type", "application/json") // add request headers
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.get()
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
|
||||||
|
return response.body().string();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getBucket(String url) throws IOException {
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(url)
|
||||||
|
.addHeader("Content-Type", "application/json") // add request headers
|
||||||
|
.addHeader("Authorization", "Bearer " + access_token)
|
||||||
|
.get()
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
|
||||||
|
if (!response.isSuccessful())
|
||||||
|
throw new IOException("Unexpected code " + response + response.body().string());
|
||||||
|
|
||||||
|
// Get response body
|
||||||
|
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||||
|
|
||||||
|
|
||||||
|
return zenodoModel.getLinks().getBucket();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
public class Community {
|
||||||
|
private String identifier;
|
||||||
|
|
||||||
|
public String getIdentifier() {
|
||||||
|
return identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIdentifier(String identifier) {
|
||||||
|
this.identifier = identifier;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
public class Creator {
|
||||||
|
private String affiliation;
|
||||||
|
private String name;
|
||||||
|
private String orcid;
|
||||||
|
|
||||||
|
public String getAffiliation() {
|
||||||
|
return affiliation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAffiliation(String affiliation) {
|
||||||
|
this.affiliation = affiliation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOrcid() {
|
||||||
|
return orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrcid(String orcid) {
|
||||||
|
this.orcid = orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||||
|
Creator c = new Creator();
|
||||||
|
if (!(name == null)) {
|
||||||
|
c.name = name;
|
||||||
|
}
|
||||||
|
if (!(affiliation == null)) {
|
||||||
|
c.affiliation = affiliation;
|
||||||
|
}
|
||||||
|
if (!(orcid == null)) {
|
||||||
|
c.orcid = orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import net.minidev.json.annotate.JsonIgnore;
|
||||||
|
|
||||||
|
public class File implements Serializable {
|
||||||
|
private String checksum;
|
||||||
|
private String filename;
|
||||||
|
private long filesize;
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
@JsonIgnore
|
||||||
|
// private Links links;
|
||||||
|
|
||||||
|
public String getChecksum() {
|
||||||
|
return checksum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setChecksum(String checksum) {
|
||||||
|
this.checksum = checksum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFilename() {
|
||||||
|
return filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFilename(String filename) {
|
||||||
|
this.filename = filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getFilesize() {
|
||||||
|
return filesize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFilesize(long filesize) {
|
||||||
|
this.filesize = filesize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @JsonIgnore
|
||||||
|
// public Links getLinks() {
|
||||||
|
// return links;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// @JsonIgnore
|
||||||
|
// public void setLinks(Links links) {
|
||||||
|
// this.links = links;
|
||||||
|
// }
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Grant implements Serializable {
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Grant newInstance(String id) {
|
||||||
|
Grant g = new Grant();
|
||||||
|
g.id = id;
|
||||||
|
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,92 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Links implements Serializable {
|
||||||
|
|
||||||
|
private String bucket;
|
||||||
|
|
||||||
|
private String discard;
|
||||||
|
|
||||||
|
private String edit;
|
||||||
|
private String files;
|
||||||
|
private String html;
|
||||||
|
private String latest_draft;
|
||||||
|
private String latest_draft_html;
|
||||||
|
private String publish;
|
||||||
|
|
||||||
|
private String self;
|
||||||
|
|
||||||
|
public String getBucket() {
|
||||||
|
return bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBucket(String bucket) {
|
||||||
|
this.bucket = bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDiscard() {
|
||||||
|
return discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDiscard(String discard) {
|
||||||
|
this.discard = discard;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEdit() {
|
||||||
|
return edit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEdit(String edit) {
|
||||||
|
this.edit = edit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFiles() {
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFiles(String files) {
|
||||||
|
this.files = files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHtml() {
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHtml(String html) {
|
||||||
|
this.html = html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLatest_draft() {
|
||||||
|
return latest_draft;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLatest_draft(String latest_draft) {
|
||||||
|
this.latest_draft = latest_draft;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLatest_draft_html() {
|
||||||
|
return latest_draft_html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLatest_draft_html(String latest_draft_html) {
|
||||||
|
this.latest_draft_html = latest_draft_html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublish() {
|
||||||
|
return publish;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublish(String publish) {
|
||||||
|
this.publish = publish;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSelf() {
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSelf(String self) {
|
||||||
|
this.self = self;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,153 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Metadata implements Serializable {
|
||||||
|
|
||||||
|
private String access_right;
|
||||||
|
private List<Community> communities;
|
||||||
|
private List<Creator> creators;
|
||||||
|
private String description;
|
||||||
|
private String doi;
|
||||||
|
private List<Grant> grants;
|
||||||
|
private List<String> keywords;
|
||||||
|
private String language;
|
||||||
|
private String license;
|
||||||
|
private PrereserveDoi prereserve_doi;
|
||||||
|
private String publication_date;
|
||||||
|
private List<String> references;
|
||||||
|
private List<RelatedIdentifier> related_identifiers;
|
||||||
|
private String title;
|
||||||
|
private String upload_type;
|
||||||
|
private String version;
|
||||||
|
|
||||||
|
public String getUpload_type() {
|
||||||
|
return upload_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUpload_type(String upload_type) {
|
||||||
|
this.upload_type = upload_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getVersion() {
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVersion(String version) {
|
||||||
|
this.version = version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAccess_right() {
|
||||||
|
return access_right;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAccess_right(String access_right) {
|
||||||
|
this.access_right = access_right;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Community> getCommunities() {
|
||||||
|
return communities;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCommunities(List<Community> communities) {
|
||||||
|
this.communities = communities;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Creator> getCreators() {
|
||||||
|
return creators;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCreators(List<Creator> creators) {
|
||||||
|
this.creators = creators;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDoi() {
|
||||||
|
return doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDoi(String doi) {
|
||||||
|
this.doi = doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Grant> getGrants() {
|
||||||
|
return grants;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGrants(List<Grant> grants) {
|
||||||
|
this.grants = grants;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getKeywords() {
|
||||||
|
return keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKeywords(List<String> keywords) {
|
||||||
|
this.keywords = keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLicense() {
|
||||||
|
return license;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLicense(String license) {
|
||||||
|
this.license = license;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PrereserveDoi getPrereserve_doi() {
|
||||||
|
return prereserve_doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
|
||||||
|
this.prereserve_doi = prereserve_doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublication_date() {
|
||||||
|
return publication_date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublication_date(String publication_date) {
|
||||||
|
this.publication_date = publication_date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getReferences() {
|
||||||
|
return references;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReferences(List<String> references) {
|
||||||
|
this.references = references;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<RelatedIdentifier> getRelated_identifiers() {
|
||||||
|
return related_identifiers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
|
||||||
|
this.related_identifiers = related_identifiers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class PrereserveDoi implements Serializable {
|
||||||
|
private String doi;
|
||||||
|
private String recid;
|
||||||
|
|
||||||
|
public String getDoi() {
|
||||||
|
return doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDoi(String doi) {
|
||||||
|
this.doi = doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecid() {
|
||||||
|
return recid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecid(String recid) {
|
||||||
|
this.recid = recid;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class RelatedIdentifier implements Serializable {
|
||||||
|
private String identifier;
|
||||||
|
private String relation;
|
||||||
|
private String resource_type;
|
||||||
|
private String scheme;
|
||||||
|
|
||||||
|
public String getIdentifier() {
|
||||||
|
return identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIdentifier(String identifier) {
|
||||||
|
this.identifier = identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRelation() {
|
||||||
|
return relation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRelation(String relation) {
|
||||||
|
this.relation = relation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResource_type() {
|
||||||
|
return resource_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResource_type(String resource_type) {
|
||||||
|
this.resource_type = resource_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getScheme() {
|
||||||
|
return scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScheme(String scheme) {
|
||||||
|
this.scheme = scheme;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,118 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ZenodoModel implements Serializable {
|
||||||
|
|
||||||
|
private String conceptrecid;
|
||||||
|
private String created;
|
||||||
|
|
||||||
|
private List<File> files;
|
||||||
|
private String id;
|
||||||
|
private Links links;
|
||||||
|
private Metadata metadata;
|
||||||
|
private String modified;
|
||||||
|
private String owner;
|
||||||
|
private String record_id;
|
||||||
|
private String state;
|
||||||
|
private boolean submitted;
|
||||||
|
private String title;
|
||||||
|
|
||||||
|
public String getConceptrecid() {
|
||||||
|
return conceptrecid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConceptrecid(String conceptrecid) {
|
||||||
|
this.conceptrecid = conceptrecid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCreated() {
|
||||||
|
return created;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCreated(String created) {
|
||||||
|
this.created = created;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<File> getFiles() {
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFiles(List<File> files) {
|
||||||
|
this.files = files;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Links getLinks() {
|
||||||
|
return links;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLinks(Links links) {
|
||||||
|
this.links = links;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Metadata getMetadata() {
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMetadata(Metadata metadata) {
|
||||||
|
this.metadata = metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getModified() {
|
||||||
|
return modified;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setModified(String modified) {
|
||||||
|
this.modified = modified;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOwner() {
|
||||||
|
return owner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOwner(String owner) {
|
||||||
|
this.owner = owner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecord_id() {
|
||||||
|
return record_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecord_id(String record_id) {
|
||||||
|
this.record_id = record_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setState(String state) {
|
||||||
|
this.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSubmitted() {
|
||||||
|
return submitted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubmitted(boolean submitted) {
|
||||||
|
this.submitted = submitted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class ZenodoModelList extends ArrayList<ZenodoModel> {
|
||||||
|
}
|
|
@ -0,0 +1,85 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
@Disabled
|
||||||
|
public class ZenodoAPIClientTest {
|
||||||
|
|
||||||
|
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||||
|
private final String ACCESS_TOKEN = "";
|
||||||
|
|
||||||
|
private final String CONCEPT_REC_ID = "657113";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNewDeposition() throws IOException {
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
Assertions.assertEquals(201, client.newDeposition());
|
||||||
|
|
||||||
|
File file = new File(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
||||||
|
.getPath());
|
||||||
|
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
|
||||||
|
|
||||||
|
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
||||||
|
|
||||||
|
Assertions.assertEquals(202, client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
|
||||||
|
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||||
|
|
||||||
|
File file = new File(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
|
||||||
|
.getPath());
|
||||||
|
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||||
|
|
||||||
|
Assertions.assertEquals(202, client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
|
||||||
|
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||||
|
|
||||||
|
File file = new File(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
|
||||||
|
.getPath());
|
||||||
|
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
|
||||||
|
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||||
|
|
||||||
|
Assertions.assertEquals(202, client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
{"metadata":{"access_right":"open","communities":[{"identifier":"openaire-research-graph"}],"creators":[{"affiliation":"ISTI - CNR","name":"Bardi, Alessia","orcid":"0000-0002-1112-1292"},{"affiliation":"eifl", "name":"Kuchma, Iryna"},{"affiliation":"BIH", "name":"Brobov, Evgeny"},{"affiliation":"GIDIF RBM", "name":"Truccolo, Ivana"},{"affiliation":"unesp", "name":"Monteiro, Elizabete"},{"affiliation":"und", "name":"Casalegno, Carlotta"},{"affiliation":"CARL ABRC", "name":"Clary, Erin"},{"affiliation":"The University of Edimburgh", "name":"Romanowski, Andrew"},{"affiliation":"ISTI - CNR", "name":"Pavone, Gina"},{"affiliation":"ISTI - CNR", "name":"Artini, Michele"},{"affiliation":"ISTI - CNR","name":"Atzori, Claudio","orcid":"0000-0001-9613-6639"},{"affiliation":"University of Bielefeld","name":"Bäcker, Amelie","orcid":"0000-0001-6015-2063"},{"affiliation":"ISTI - CNR","name":"Baglioni, Miriam","orcid":"0000-0002-2273-9004"},{"affiliation":"University of Bielefeld","name":"Czerniak, Andreas","orcid":"0000-0003-3883-4169"},{"affiliation":"ISTI - CNR","name":"De Bonis, Michele"},{"affiliation":"Athena Research and Innovation Centre","name":"Dimitropoulos, Harry"},{"affiliation":"Athena Research and Innovation Centre","name":"Foufoulas, Ioannis"},{"affiliation":"University of Warsaw","name":"Horst, Marek"},{"affiliation":"Athena Research and Innovation Centre","name":"Iatropoulou, Katerina"},{"affiliation":"University of Warsaw","name":"Jacewicz, Przemyslaw"},{"affiliation":"Athena Research and Innovation Centre","name":"Kokogiannaki, Argiro", "orcid":"0000-0002-3880-0244"},{"affiliation":"ISTI - CNR","name":"La Bruzzo, Sandro","orcid":"0000-0003-2855-1245"},{"affiliation":"ISTI - CNR","name":"Lazzeri, Emma"},{"affiliation":"University of Bielefeld","name":"Löhden, Aenne"},{"affiliation":"ISTI - CNR","name":"Manghi, Paolo","orcid":"0000-0001-7291-3210"},{"affiliation":"ISTI - CNR","name":"Mannocci, Andrea","orcid":"0000-0002-5193-7851"},{"affiliation":"Athena Research and Innovation Center","name":"Manola, Natalia"},{"affiliation":"ISTI - CNR","name":"Ottonello, Enrico"},{"affiliation":"University of Bielefeld","name":"Shirrwagen, Jochen"}],"description":"\\u003cp\\u003eThis dump provides access to the metadata records of publications, research data, software and projects that may be relevant to the Corona Virus Disease (COVID-19) fight. The dump contains records of the OpenAIRE COVID-19 Gateway (https://covid-19.openaire.eu/), identified via full-text mining and inference techniques applied to the OpenAIRE Research Graph (https://explore.openaire.eu/). The Graph is one of the largest Open Access collections of metadata records and links between publications, datasets, software, projects, funders, and organizations, aggregating 12,000+ scientific data sources world-wide, among which the Covid-19 data sources Zenodo COVID-19 Community, WHO (World Health Organization), BIP! FInder for COVID-19, Protein Data Bank, Dimensions, scienceOpen, and RSNA. \\u003cp\\u003eThe dump consists of a gzip file containing one json per line. Each json is compliant to the schema available at https://doi.org/10.5281/zenodo.3974226\\u003c/p\\u003e ","title":"OpenAIRE Covid-19 publications, datasets, software and projects metadata.","upload_type":"dataset","version":"1.0"}}
|
|
@ -0,0 +1 @@
|
||||||
|
This is a test for a new deposition
|
|
@ -0,0 +1 @@
|
||||||
|
This is a test for a new version of an old deposition
|
|
@ -0,0 +1,2 @@
|
||||||
|
This is a test for a new version of an old deposition. This should replace the other new version. I expect to have only two
|
||||||
|
files in the deposition
|
|
@ -79,6 +79,15 @@ public class ModelSupport {
|
||||||
entityIdPrefix.put("result", "50");
|
entityIdPrefix.put("result", "50");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final Map<String, String> idPrefixEntity = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
idPrefixEntity.put("10", "datasource");
|
||||||
|
idPrefixEntity.put("20", "organization");
|
||||||
|
idPrefixEntity.put("40", "project");
|
||||||
|
idPrefixEntity.put("50", "result");
|
||||||
|
}
|
||||||
|
|
||||||
public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
|
public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
|
||||||
|
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount
|
||||||
|
*/
|
||||||
|
public class APC implements Serializable {
|
||||||
|
private String currency;
|
||||||
|
private String amount;
|
||||||
|
|
||||||
|
public String getCurrency() {
|
||||||
|
return currency;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCurrency(String currency) {
|
||||||
|
this.currency = currency;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAmount() {
|
||||||
|
return amount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAmount(String amount) {
|
||||||
|
this.amount = amount;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier
|
||||||
|
* element with a parameter scheme of type String to store the scheme. Values for this element are found against the
|
||||||
|
* COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
|
||||||
|
* the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
|
||||||
|
* COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
|
||||||
|
* access right scheme
|
||||||
|
*/
|
||||||
|
public class AccessRight extends Qualifier {
|
||||||
|
|
||||||
|
private String scheme;
|
||||||
|
|
||||||
|
public String getScheme() {
|
||||||
|
return scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScheme(String scheme) {
|
||||||
|
this.scheme = scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static AccessRight newInstance(String code, String label, String scheme) {
|
||||||
|
AccessRight ar = new AccessRight();
|
||||||
|
ar.setCode(code);
|
||||||
|
ar.setLabel(label);
|
||||||
|
ar.setScheme(scheme);
|
||||||
|
return ar;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
|
||||||
|
* name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
|
||||||
|
* type String to store the family name of the author. The value for this parameter corresponds to
|
||||||
|
* eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
|
||||||
|
* this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
|
||||||
|
* the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
|
||||||
|
* rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
|
||||||
|
* moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
|
||||||
|
* eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
|
||||||
|
* instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
|
||||||
|
* instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
|
||||||
|
* dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
|
||||||
|
*/
|
||||||
|
public class Author implements Serializable {
|
||||||
|
|
||||||
|
private String fullname;
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
private String surname;
|
||||||
|
|
||||||
|
private Integer rank;
|
||||||
|
|
||||||
|
private Pid pid;
|
||||||
|
|
||||||
|
public String getFullname() {
|
||||||
|
return fullname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFullname(String fullname) {
|
||||||
|
this.fullname = fullname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSurname() {
|
||||||
|
return surname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSurname(String surname) {
|
||||||
|
this.surname = surname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getRank() {
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRank(Integer rank) {
|
||||||
|
this.rank = rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pid getPid() {
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPid(Pid pid) {
|
||||||
|
this.pid = pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,136 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To store information about the conference or journal where the result has been presented or published. It contains
|
||||||
|
* eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
|
||||||
|
* parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
|
||||||
|
* It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
|
||||||
|
* the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
|
||||||
|
* issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
|
||||||
|
* eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
|
||||||
|
* eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
|
||||||
|
* iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
|
||||||
|
* sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
|
||||||
|
* of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
|
||||||
|
* proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
|
||||||
|
* String to store the place of the conference. It corresponds to the parameter conferenceplace of
|
||||||
|
* eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
|
||||||
|
* to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
|
||||||
|
*/
|
||||||
|
public class Container implements Serializable {
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
private String issnPrinted;
|
||||||
|
|
||||||
|
private String issnOnline;
|
||||||
|
|
||||||
|
private String issnLinking;
|
||||||
|
|
||||||
|
private String ep;
|
||||||
|
|
||||||
|
private String iss;
|
||||||
|
|
||||||
|
private String sp;
|
||||||
|
|
||||||
|
private String vol;
|
||||||
|
|
||||||
|
private String edition;
|
||||||
|
|
||||||
|
private String conferenceplace;
|
||||||
|
|
||||||
|
private String conferencedate;
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getIssnPrinted() {
|
||||||
|
return issnPrinted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIssnPrinted(String issnPrinted) {
|
||||||
|
this.issnPrinted = issnPrinted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getIssnOnline() {
|
||||||
|
return issnOnline;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIssnOnline(String issnOnline) {
|
||||||
|
this.issnOnline = issnOnline;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getIssnLinking() {
|
||||||
|
return issnLinking;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIssnLinking(String issnLinking) {
|
||||||
|
this.issnLinking = issnLinking;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEp() {
|
||||||
|
return ep;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEp(String ep) {
|
||||||
|
this.ep = ep;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getIss() {
|
||||||
|
return iss;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIss(String iss) {
|
||||||
|
this.iss = iss;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSp() {
|
||||||
|
return sp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSp(String sp) {
|
||||||
|
this.sp = sp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getVol() {
|
||||||
|
return vol;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVol(String vol) {
|
||||||
|
this.vol = vol;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEdition() {
|
||||||
|
return edition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEdition(String edition) {
|
||||||
|
this.edition = edition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getConferenceplace() {
|
||||||
|
return conferenceplace;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConferenceplace(String conferenceplace) {
|
||||||
|
this.conferenceplace = conferenceplace;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getConferencedate() {
|
||||||
|
return conferencedate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConferencedate(String conferencedate) {
|
||||||
|
this.conferencedate = conferencedate;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: -
|
||||||
|
* scheme of type String to store the scheme - value of type String to store the value in that scheme
|
||||||
|
*/
|
||||||
|
public class ControlledField implements Serializable {
|
||||||
|
private String scheme;
|
||||||
|
private String value;
|
||||||
|
|
||||||
|
public String getScheme() {
|
||||||
|
return scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScheme(String scheme) {
|
||||||
|
this.scheme = scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setValue(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ControlledField newInstance(String scheme, String value) {
|
||||||
|
ControlledField cf = new ControlledField();
|
||||||
|
|
||||||
|
cf.setScheme(scheme);
|
||||||
|
cf.setValue(value);
|
||||||
|
|
||||||
|
return cf;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
|
||||||
|
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
|
||||||
|
* result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
|
||||||
|
* to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
|
||||||
|
* eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
|
||||||
|
* dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
|
||||||
|
* datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
|
||||||
|
*/
|
||||||
|
public class Country extends Qualifier {
|
||||||
|
|
||||||
|
private Provenance provenance;
|
||||||
|
|
||||||
|
public Provenance getProvenance() {
|
||||||
|
return provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProvenance(Provenance provenance) {
|
||||||
|
this.provenance = provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Country newInstance(String code, String label, Provenance provenance) {
|
||||||
|
Country c = new Country();
|
||||||
|
c.setProvenance(provenance);
|
||||||
|
c.setCode(code);
|
||||||
|
c.setLabel(label);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Country newInstance(String code, String label, String provenance, String trust) {
|
||||||
|
return newInstance(code, label, Provenance.newInstance(provenance, trust));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Funder implements Serializable {
|
||||||
|
private String shortName;
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
private String jurisdiction;
|
||||||
|
|
||||||
|
public String getJurisdiction() {
|
||||||
|
return jurisdiction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJurisdiction(String jurisdiction) {
|
||||||
|
this.jurisdiction = jurisdiction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getShortName() {
|
||||||
|
return shortName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setShortName(String shortName) {
|
||||||
|
this.shortName = shortName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the geolocation information. It has three parameters: - point of type String to store the point
|
||||||
|
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
|
||||||
|
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
|
||||||
|
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
|
||||||
|
*/
|
||||||
|
public class GeoLocation implements Serializable {
|
||||||
|
|
||||||
|
private String point;
|
||||||
|
|
||||||
|
private String box;
|
||||||
|
|
||||||
|
private String place;
|
||||||
|
|
||||||
|
public String getPoint() {
|
||||||
|
return point;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPoint(String point) {
|
||||||
|
this.point = point;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBox() {
|
||||||
|
return box;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBox(String box) {
|
||||||
|
this.box = box;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPlace() {
|
||||||
|
return place;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPlace(String place) {
|
||||||
|
this.place = place;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonIgnore
|
||||||
|
public boolean isBlank() {
|
||||||
|
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,107 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
|
||||||
|
* versions are two manifestations of the same research result. It has the following parameters: - license of type
|
||||||
|
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
|
||||||
|
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
|
||||||
|
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
|
||||||
|
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - hostedby of
|
||||||
|
* type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance can be
|
||||||
|
* viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - key corresponds
|
||||||
|
* to hostedby.key - value corresponds to hostedby.value - url of type List<String> list of locations where the instance
|
||||||
|
* is accessible. It corresponds to url of the instance to be dumped - collectedfrom of type
|
||||||
|
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
|
||||||
|
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
|
||||||
|
* collectedfrom.key - value corresponds to collectedfrom.value - publicationdate of type String to store the
|
||||||
|
* publication date of the instance ;// dateofacceptance; - refereed of type String to store information abour tthe
|
||||||
|
* review status of the instance. Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed'. It corresponds to
|
||||||
|
* refereed.classname of the instance to be dumped
|
||||||
|
*/
|
||||||
|
public class Instance implements Serializable {
|
||||||
|
|
||||||
|
private String license;
|
||||||
|
|
||||||
|
private AccessRight accessright;
|
||||||
|
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
private KeyValue hostedby;
|
||||||
|
|
||||||
|
private List<String> url;
|
||||||
|
|
||||||
|
private KeyValue collectedfrom;
|
||||||
|
|
||||||
|
private String publicationdate;// dateofacceptance;
|
||||||
|
|
||||||
|
private String refereed; // peer-review status
|
||||||
|
|
||||||
|
public String getLicense() {
|
||||||
|
return license;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLicense(String license) {
|
||||||
|
this.license = license;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AccessRight getAccessright() {
|
||||||
|
return accessright;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAccessright(AccessRight accessright) {
|
||||||
|
this.accessright = accessright;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public KeyValue getHostedby() {
|
||||||
|
return hostedby;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHostedby(KeyValue hostedby) {
|
||||||
|
this.hostedby = hostedby;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUrl(List<String> url) {
|
||||||
|
this.url = url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public KeyValue getCollectedfrom() {
|
||||||
|
return collectedfrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCollectedfrom(KeyValue collectedfrom) {
|
||||||
|
this.collectedfrom = collectedfrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublicationdate() {
|
||||||
|
return publicationdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublicationdate(String publicationdate) {
|
||||||
|
this.publicationdate = publicationdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRefereed() {
|
||||||
|
return refereed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRefereed(String refereed) {
|
||||||
|
this.refereed = refereed;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the information described by a key and a value. It has two parameters: - key to store the key (generally
|
||||||
|
* the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key)
|
||||||
|
*/
|
||||||
|
public class KeyValue implements Serializable {
|
||||||
|
|
||||||
|
private String key;
|
||||||
|
|
||||||
|
private String value;
|
||||||
|
|
||||||
|
public String getKey() {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKey(String key) {
|
||||||
|
this.key = key;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setValue(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static KeyValue newInstance(String key, String value) {
|
||||||
|
KeyValue inst = new KeyValue();
|
||||||
|
inst.key = key;
|
||||||
|
inst.value = value;
|
||||||
|
return inst;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonIgnore
|
||||||
|
public boolean isBlank() {
|
||||||
|
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the generic persistent identifier. It has two parameters: - id of type
|
||||||
|
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. -
|
||||||
|
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
|
||||||
|
*/
|
||||||
|
public class Pid implements Serializable {
|
||||||
|
private ControlledField id;
|
||||||
|
private Provenance provenance;
|
||||||
|
|
||||||
|
public ControlledField getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(ControlledField pid) {
|
||||||
|
this.id = pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Provenance getProvenance() {
|
||||||
|
return provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProvenance(Provenance provenance) {
|
||||||
|
this.provenance = provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Pid newInstance(ControlledField pid, Provenance provenance) {
|
||||||
|
Pid p = new Pid();
|
||||||
|
p.id = pid;
|
||||||
|
p.provenance = provenance;
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Pid newInstance(ControlledField pid) {
|
||||||
|
Pid p = new Pid();
|
||||||
|
p.id = pid;
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Project implements Serializable {
|
||||||
|
protected String id;// OpenAIRE id
|
||||||
|
protected String code;
|
||||||
|
|
||||||
|
protected String acronym;
|
||||||
|
|
||||||
|
protected String title;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCode(String code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAcronym() {
|
||||||
|
return acronym;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAcronym(String acronym) {
|
||||||
|
this.acronym = acronym;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates the process that produced (or provided) the information, and the trust associated to the information. It
|
||||||
|
* has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
|
||||||
|
* store the trust associated to the information
|
||||||
|
*/
|
||||||
|
public class Provenance implements Serializable {
|
||||||
|
private String provenance;
|
||||||
|
private String trust;
|
||||||
|
|
||||||
|
public String getProvenance() {
|
||||||
|
return provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProvenance(String provenance) {
|
||||||
|
this.provenance = provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTrust() {
|
||||||
|
return trust;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTrust(String trust) {
|
||||||
|
this.trust = trust;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Provenance newInstance(String provenance, String trust) {
|
||||||
|
Provenance p = new Provenance();
|
||||||
|
p.provenance = provenance;
|
||||||
|
p.trust = trust;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return provenance + trust;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the information described by a code and a value It has two parameters: - code to store the code
|
||||||
|
* (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the
|
||||||
|
* classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element
|
||||||
|
*/
|
||||||
|
public class Qualifier implements Serializable {
|
||||||
|
|
||||||
|
private String code; // the classid in the Qualifier
|
||||||
|
private String label; // the classname in the Qualifier
|
||||||
|
|
||||||
|
public String getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCode(String code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLabel() {
|
||||||
|
return label;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLabel(String label) {
|
||||||
|
this.label = label;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Qualifier newInstance(String code, String value) {
|
||||||
|
Qualifier qualifier = new Qualifier();
|
||||||
|
qualifier.setCode(code);
|
||||||
|
qualifier.setLabel(value);
|
||||||
|
return qualifier;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,391 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
|
||||||
|
* Initiative/Infrastructures. It has the following parameters: - author of type
|
||||||
|
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
|
||||||
|
* represented in the internal model one author in the esternal model is produced. - type of type String to represent
|
||||||
|
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
|
||||||
|
* resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store
|
||||||
|
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
|
||||||
|
* corresponds to language.classname - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
|
||||||
|
* list to which the result is associated. For each country in the result respresented in the internal model one country
|
||||||
|
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
|
||||||
|
* the result. For each subject in the result represented in the internal model one subject in the external model is
|
||||||
|
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
|
||||||
|
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
|
||||||
|
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
|
||||||
|
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
|
||||||
|
* description.value in the result represented in the internal model - publicationdate of type String to store the
|
||||||
|
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
|
||||||
|
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
|
||||||
|
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
|
||||||
|
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
|
||||||
|
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
|
||||||
|
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
|
||||||
|
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
|
||||||
|
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
|
||||||
|
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
|
||||||
|
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
|
||||||
|
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
|
||||||
|
* internal model - instance of type List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated
|
||||||
|
* to the result. It corresponds to the same parameter in the result represented in the internal model - container of
|
||||||
|
* type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It corresponds to the parameter
|
||||||
|
* journal of the result represented in the internal model - documentationUrl of type List<String> (only for results of
|
||||||
|
* type software) to store the URLs to the software documentation. It corresponds to the list of documentationUrl.value
|
||||||
|
* of the result represented in the internal model - codeRepositoryUrl of type String (only for results of type
|
||||||
|
* software) to store the URL to the repository with the source code. It corresponds to codeRepositoryUrl.value of the
|
||||||
|
* result represented in the internal model - programmingLanguage of type String (only for results of type software) to
|
||||||
|
* store the programming language. It corresponds to programmingLanguaga.classid of the result represented in the
|
||||||
|
* internal model - contactperson of type List<String> (only for results of type other) to store the contact person for
|
||||||
|
* this result. It corresponds to the list of contactperson.value of the result represented in the internal model -
|
||||||
|
* contactgroup of type List<String> (only for results of type other) to store the information for the contact group. It
|
||||||
|
* corresponds to the list of contactgroup.value of the result represented in the internal model - tool of type
|
||||||
|
* List<String> (only fro results of type other) to store information about tool useful for the interpretation and/or
|
||||||
|
* re-used of the research product. It corresponds to the list of tool.value in the result represented in the internal
|
||||||
|
* modelt - size of type String (only for results of type dataset) to store the size of the dataset. It corresponds to
|
||||||
|
* size.value in the result represented in the internal model - version of type String (only for results of type
|
||||||
|
* dataset) to store the version. It corresponds to version.value of the result represented in the internal model -
|
||||||
|
* geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type dataset) to store
|
||||||
|
* geolocation information. For each geolocation element in the result represented in the internal model a GeoLocation
|
||||||
|
* in the external model il produced - id of type String to store the OpenAIRE id of the result. It corresponds to the
|
||||||
|
* id of the result represented in the internal model - originalId of type List<String> to store the original ids of the
|
||||||
|
* result. It corresponds to the originalId of the result represented in the internal model - pid of type
|
||||||
|
* List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For each pid
|
||||||
|
* in the results represented in the internal model one pid in the external model is produced. The value correspondence
|
||||||
|
* is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - value corresponds
|
||||||
|
* to the pid.value of the result represented in the internal model - dateofcollection of type String to store
|
||||||
|
* information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
|
||||||
|
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
|
||||||
|
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
|
||||||
|
*/
|
||||||
|
public class Result implements Serializable {
|
||||||
|
|
||||||
|
private List<Author> author;
|
||||||
|
|
||||||
|
// resulttype allows subclassing results into publications | datasets | software
|
||||||
|
private String type; // resulttype
|
||||||
|
|
||||||
|
// common fields
|
||||||
|
private Qualifier language;
|
||||||
|
|
||||||
|
private List<Country> country;
|
||||||
|
|
||||||
|
private List<Subject> subjects;
|
||||||
|
|
||||||
|
private String maintitle;
|
||||||
|
|
||||||
|
private String subtitle;
|
||||||
|
|
||||||
|
private List<String> description;
|
||||||
|
|
||||||
|
private String publicationdate; // dateofacceptance;
|
||||||
|
|
||||||
|
private String publisher;
|
||||||
|
|
||||||
|
private String embargoenddate;
|
||||||
|
|
||||||
|
private List<String> source;
|
||||||
|
|
||||||
|
private List<String> format;
|
||||||
|
|
||||||
|
private List<String> contributor;
|
||||||
|
|
||||||
|
private List<String> coverage;
|
||||||
|
|
||||||
|
private AccessRight bestaccessright;
|
||||||
|
|
||||||
|
private List<Instance> instance;
|
||||||
|
|
||||||
|
private Container container;// Journal
|
||||||
|
|
||||||
|
private List<String> documentationUrl; // software
|
||||||
|
|
||||||
|
private String codeRepositoryUrl; // software
|
||||||
|
|
||||||
|
private String programmingLanguage; // software
|
||||||
|
|
||||||
|
private List<String> contactperson; // orp
|
||||||
|
|
||||||
|
private List<String> contactgroup; // orp
|
||||||
|
|
||||||
|
private List<String> tool; // orp
|
||||||
|
|
||||||
|
private String size; // dataset
|
||||||
|
|
||||||
|
private String version; // dataset
|
||||||
|
|
||||||
|
private List<GeoLocation> geolocation; // dataset
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
private List<String> originalId;
|
||||||
|
|
||||||
|
private List<ControlledField> pid;
|
||||||
|
|
||||||
|
private String dateofcollection;
|
||||||
|
|
||||||
|
private Long lastupdatetimestamp;
|
||||||
|
|
||||||
|
public Long getLastupdatetimestamp() {
|
||||||
|
return lastupdatetimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
|
||||||
|
this.lastupdatetimestamp = lastupdatetimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getOriginalId() {
|
||||||
|
return originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOriginalId(List<String> originalId) {
|
||||||
|
this.originalId = originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ControlledField> getPid() {
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPid(List<ControlledField> pid) {
|
||||||
|
this.pid = pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDateofcollection() {
|
||||||
|
return dateofcollection;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDateofcollection(String dateofcollection) {
|
||||||
|
this.dateofcollection = dateofcollection;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Author> getAuthor() {
|
||||||
|
return author;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Container getContainer() {
|
||||||
|
return container;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContainer(Container container) {
|
||||||
|
this.container = container;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuthor(List<Author> author) {
|
||||||
|
this.author = author;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLanguage(Qualifier language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Country> getCountry() {
|
||||||
|
return country;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCountry(List<Country> country) {
|
||||||
|
this.country = country;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Subject> getSubjects() {
|
||||||
|
return subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubjects(List<Subject> subjects) {
|
||||||
|
this.subjects = subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMaintitle() {
|
||||||
|
return maintitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaintitle(String maintitle) {
|
||||||
|
this.maintitle = maintitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSubtitle() {
|
||||||
|
return subtitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubtitle(String subtitle) {
|
||||||
|
this.subtitle = subtitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(List<String> description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublicationdate() {
|
||||||
|
return publicationdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublicationdate(String publicationdate) {
|
||||||
|
this.publicationdate = publicationdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublisher() {
|
||||||
|
return publisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublisher(String publisher) {
|
||||||
|
this.publisher = publisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEmbargoenddate() {
|
||||||
|
return embargoenddate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEmbargoenddate(String embargoenddate) {
|
||||||
|
this.embargoenddate = embargoenddate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSource() {
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSource(List<String> source) {
|
||||||
|
this.source = source;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getFormat() {
|
||||||
|
return format;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFormat(List<String> format) {
|
||||||
|
this.format = format;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getContributor() {
|
||||||
|
return contributor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContributor(List<String> contributor) {
|
||||||
|
this.contributor = contributor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getCoverage() {
|
||||||
|
return coverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCoverage(List<String> coverage) {
|
||||||
|
this.coverage = coverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AccessRight getBestaccessright() {
|
||||||
|
return bestaccessright;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBestaccessright(AccessRight bestaccessright) {
|
||||||
|
this.bestaccessright = bestaccessright;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Instance> getInstance() {
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInstance(List<Instance> instance) {
|
||||||
|
this.instance = instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getDocumentationUrl() {
|
||||||
|
return documentationUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDocumentationUrl(List<String> documentationUrl) {
|
||||||
|
this.documentationUrl = documentationUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCodeRepositoryUrl() {
|
||||||
|
return codeRepositoryUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCodeRepositoryUrl(String codeRepositoryUrl) {
|
||||||
|
this.codeRepositoryUrl = codeRepositoryUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProgrammingLanguage() {
|
||||||
|
return programmingLanguage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProgrammingLanguage(String programmingLanguage) {
|
||||||
|
this.programmingLanguage = programmingLanguage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getContactperson() {
|
||||||
|
return contactperson;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContactperson(List<String> contactperson) {
|
||||||
|
this.contactperson = contactperson;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getContactgroup() {
|
||||||
|
return contactgroup;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContactgroup(List<String> contactgroup) {
|
||||||
|
this.contactgroup = contactgroup;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getTool() {
|
||||||
|
return tool;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTool(List<String> tool) {
|
||||||
|
this.tool = tool;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSize() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSize(String size) {
|
||||||
|
this.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getVersion() {
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVersion(String version) {
|
||||||
|
this.version = version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<GeoLocation> getGeolocation() {
|
||||||
|
return geolocation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGeolocation(List<GeoLocation> geolocation) {
|
||||||
|
this.geolocation = geolocation;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent keywords associated to the result. It has two parameters: - subject of type
|
||||||
|
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to
|
||||||
|
* qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type
|
||||||
|
* eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
|
||||||
|
* is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to
|
||||||
|
* dataInfo.trust
|
||||||
|
*/
|
||||||
|
public class Subject implements Serializable {
|
||||||
|
private ControlledField subject;
|
||||||
|
private Provenance provenance;
|
||||||
|
|
||||||
|
public ControlledField getSubject() {
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubject(ControlledField subject) {
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Provenance getProvenance() {
|
||||||
|
return provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProvenance(Provenance provenance) {
|
||||||
|
this.provenance = provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
|
||||||
|
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
|
||||||
|
* information is added after the result is mapped to the external model - context of type
|
||||||
|
* List<eu.dnetlib.dhp.schema/dump.oaf.community.Context> to store information about the RC RI related to the result.
|
||||||
|
* For each context in the result represented in the internal model one context in the external model is produced -
|
||||||
|
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
|
||||||
|
* the record has been collected. For each collectedfrom in the result represented in the internal model one
|
||||||
|
* collectedfrom in the external model is produced
|
||||||
|
*/
|
||||||
|
public class CommunityResult extends Result {
|
||||||
|
|
||||||
|
private List<Project> projects;
|
||||||
|
|
||||||
|
private List<Context> context;
|
||||||
|
|
||||||
|
protected List<KeyValue> collectedfrom;
|
||||||
|
|
||||||
|
public List<KeyValue> getCollectedfrom() {
|
||||||
|
return collectedfrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||||
|
this.collectedfrom = collectedfrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Project> getProjects() {
|
||||||
|
return projects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProjects(List<Project> projects) {
|
||||||
|
this.projects = projects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Context> getContext() {
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContext(List<Context> context) {
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
|
||||||
|
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
|
||||||
|
* List<eu.dnetlib.dhp.schema.dump.oaf.Provenance> to store the provenances of the association between the result and
|
||||||
|
* the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
|
||||||
|
* to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
|
||||||
|
* will be used as value for code - label it corresponds to the label associated to the id. The information id taken
|
||||||
|
* from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
|
||||||
|
* result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
|
||||||
|
* instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
|
||||||
|
* dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
|
||||||
|
*/
|
||||||
|
public class Context extends Qualifier {
|
||||||
|
private List<Provenance> provenance;
|
||||||
|
|
||||||
|
public List<Provenance> getProvenance() {
|
||||||
|
return provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProvenance(List<Provenance> provenance) {
|
||||||
|
this.provenance = provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
String provenance = new String();
|
||||||
|
this.provenance.forEach(p -> provenance.concat(p.toString()));
|
||||||
|
return Objects.hash(getCode(), getLabel(), provenance);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To store information about the funder funding the project related to the result. It has the following parameters: -
|
||||||
|
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
|
||||||
|
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
|
||||||
|
* store the jurisdiction of the funder
|
||||||
|
*/
|
||||||
|
public class Funder implements Serializable {
|
||||||
|
private String shortName;
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
private String fundingStream;
|
||||||
|
|
||||||
|
private String jurisdiction;
|
||||||
|
|
||||||
|
public String getJurisdiction() {
|
||||||
|
return jurisdiction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJurisdiction(String jurisdiction) {
|
||||||
|
this.jurisdiction = jurisdiction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getShortName() {
|
||||||
|
return shortName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setShortName(String shortName) {
|
||||||
|
this.shortName = shortName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFundingStream() {
|
||||||
|
return fundingStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFundingStream(String fundingStream) {
|
||||||
|
this.fundingStream = fundingStream;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To store information about the project related to the result. This information is not directly mapped from the result
|
||||||
|
* represented in the internal model because it is not there. The mapped result will be enriched with project
|
||||||
|
* information derived by relation between results and projects. Project class has the following parameters: - id of
|
||||||
|
* type String to store the OpenAIRE id for the Project - code of type String to store the grant agreement - acronym of
|
||||||
|
* type String to store the acronym for the project - title of type String to store the title of the project - funder of
|
||||||
|
* type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information about the funder funding the project -
|
||||||
|
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store information about the. provenance of the
|
||||||
|
* association between the result and the project
|
||||||
|
*/
|
||||||
|
public class Project implements Serializable {
|
||||||
|
|
||||||
|
private String id;// OpenAIRE id
|
||||||
|
private String code;
|
||||||
|
|
||||||
|
private String acronym;
|
||||||
|
|
||||||
|
private String title;
|
||||||
|
|
||||||
|
private Funder funder;
|
||||||
|
|
||||||
|
private Provenance provenance;
|
||||||
|
|
||||||
|
public Provenance getProvenance() {
|
||||||
|
return provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProvenance(Provenance provenance) {
|
||||||
|
this.provenance = provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCode(String code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAcronym() {
|
||||||
|
return acronym;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAcronym(String acronym) {
|
||||||
|
this.acronym = acronym;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Funder getFunder() {
|
||||||
|
return funder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFunder(Funder funders) {
|
||||||
|
this.funder = funders;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
|
||||||
|
Project project = new Project();
|
||||||
|
project.setAcronym(acronym);
|
||||||
|
project.setCode(code);
|
||||||
|
project.setFunder(funder);
|
||||||
|
project.setId(id);
|
||||||
|
project.setTitle(title);
|
||||||
|
return project;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Constants implements Serializable {
|
||||||
|
// collectedFrom va con isProvidedBy -> becco da ModelSupport
|
||||||
|
|
||||||
|
public static final String HOSTED_BY = "isHostedBy";
|
||||||
|
public static final String HOSTS = "hosts";
|
||||||
|
|
||||||
|
// community result uso isrelatedto
|
||||||
|
|
||||||
|
public static final String RESULT_ENTITY = "result";
|
||||||
|
public static final String DATASOURCE_ENTITY = "datasource";
|
||||||
|
public static final String CONTEXT_ENTITY = "context";
|
||||||
|
|
||||||
|
public static final String CONTEXT_ID = "60";
|
||||||
|
public static final String CONTEXT_NS_PREFIX = "context____";
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,316 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Container;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
|
||||||
|
* id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
|
||||||
|
* represented in the internal model - originalId of type List<String> to store the list of original ids associated to
|
||||||
|
* the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
|
||||||
|
* null values are filtered out - pid of type List<eu.dnetlib.shp.schema.dump.oaf.ControlledField> to store the
|
||||||
|
* persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
|
||||||
|
* in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
|
||||||
|
* the internal model - value corresponds to pid.value of the datasource represented in the internal model -
|
||||||
|
* datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
|
||||||
|
* pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
|
||||||
|
* corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
|
||||||
|
* datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
|
||||||
|
* store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
|
||||||
|
* It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
|
||||||
|
* of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
|
||||||
|
* represented in the internal model - englishname of type String to store the English name of the datasource. It
|
||||||
|
* corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
|
||||||
|
* store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
|
||||||
|
* the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
|
||||||
|
* logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
|
||||||
|
* of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
|
||||||
|
* datasource represented in the internal model - description of type String to store the description for the
|
||||||
|
* datasource. It corresponds to description.value of the datasource represented in the internal model
|
||||||
|
*/
|
||||||
|
public class Datasource implements Serializable {
|
||||||
|
|
||||||
|
private String id; // string
|
||||||
|
|
||||||
|
private List<String> originalId; // list string
|
||||||
|
|
||||||
|
private List<ControlledField> pid; // list<String>
|
||||||
|
|
||||||
|
private ControlledField datasourcetype; // value
|
||||||
|
|
||||||
|
private String openairecompatibility; // value
|
||||||
|
|
||||||
|
private String officialname; // string
|
||||||
|
|
||||||
|
private String englishname; // string
|
||||||
|
|
||||||
|
private String websiteurl; // string
|
||||||
|
|
||||||
|
private String logourl; // string
|
||||||
|
|
||||||
|
private String dateofvalidation; // string
|
||||||
|
|
||||||
|
private String description; // description
|
||||||
|
|
||||||
|
private List<String> subjects; // List<String>
|
||||||
|
|
||||||
|
// opendoar specific fields (od*)
|
||||||
|
|
||||||
|
private List<String> languages; // odlanguages List<String>
|
||||||
|
|
||||||
|
private List<String> contenttypes; // odcontent types List<String>
|
||||||
|
|
||||||
|
// re3data fields
|
||||||
|
private String releasestartdate; // string
|
||||||
|
|
||||||
|
private String releaseenddate; // string
|
||||||
|
|
||||||
|
private String missionstatementurl; // string
|
||||||
|
|
||||||
|
// {open, restricted or closed}
|
||||||
|
private String accessrights; // databaseaccesstype string
|
||||||
|
|
||||||
|
// {open, restricted or closed}
|
||||||
|
private String uploadrights; // datauploadtype string
|
||||||
|
|
||||||
|
// {feeRequired, registration, other}
|
||||||
|
private String databaseaccessrestriction; // string
|
||||||
|
|
||||||
|
// {feeRequired, registration, other}
|
||||||
|
private String datauploadrestriction; // string
|
||||||
|
|
||||||
|
private Boolean versioning; // boolean
|
||||||
|
|
||||||
|
private String citationguidelineurl; // string
|
||||||
|
|
||||||
|
// {yes, no, uknown}
|
||||||
|
|
||||||
|
private String pidsystems; // string
|
||||||
|
|
||||||
|
private String certificates; // string
|
||||||
|
|
||||||
|
private List<Object> policies; //
|
||||||
|
|
||||||
|
private Container journal; // issn etc del Journal
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getOriginalId() {
|
||||||
|
return originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOriginalId(List<String> originalId) {
|
||||||
|
this.originalId = originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ControlledField> getPid() {
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPid(List<ControlledField> pid) {
|
||||||
|
this.pid = pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ControlledField getDatasourcetype() {
|
||||||
|
return datasourcetype;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDatasourcetype(ControlledField datasourcetype) {
|
||||||
|
this.datasourcetype = datasourcetype;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOpenairecompatibility() {
|
||||||
|
return openairecompatibility;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenairecompatibility(String openairecompatibility) {
|
||||||
|
this.openairecompatibility = openairecompatibility;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOfficialname() {
|
||||||
|
return officialname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOfficialname(String officialname) {
|
||||||
|
this.officialname = officialname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEnglishname() {
|
||||||
|
return englishname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEnglishname(String englishname) {
|
||||||
|
this.englishname = englishname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getWebsiteurl() {
|
||||||
|
return websiteurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWebsiteurl(String websiteurl) {
|
||||||
|
this.websiteurl = websiteurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLogourl() {
|
||||||
|
return logourl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLogourl(String logourl) {
|
||||||
|
this.logourl = logourl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDateofvalidation() {
|
||||||
|
return dateofvalidation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDateofvalidation(String dateofvalidation) {
|
||||||
|
this.dateofvalidation = dateofvalidation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSubjects() {
|
||||||
|
return subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubjects(List<String> subjects) {
|
||||||
|
this.subjects = subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getLanguages() {
|
||||||
|
return languages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLanguages(List<String> languages) {
|
||||||
|
this.languages = languages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getContenttypes() {
|
||||||
|
return contenttypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContenttypes(List<String> contenttypes) {
|
||||||
|
this.contenttypes = contenttypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getReleasestartdate() {
|
||||||
|
return releasestartdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReleasestartdate(String releasestartdate) {
|
||||||
|
this.releasestartdate = releasestartdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getReleaseenddate() {
|
||||||
|
return releaseenddate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReleaseenddate(String releaseenddate) {
|
||||||
|
this.releaseenddate = releaseenddate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMissionstatementurl() {
|
||||||
|
return missionstatementurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMissionstatementurl(String missionstatementurl) {
|
||||||
|
this.missionstatementurl = missionstatementurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAccessrights() {
|
||||||
|
return accessrights;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAccessrights(String accessrights) {
|
||||||
|
this.accessrights = accessrights;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getUploadrights() {
|
||||||
|
return uploadrights;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUploadrights(String uploadrights) {
|
||||||
|
this.uploadrights = uploadrights;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDatabaseaccessrestriction() {
|
||||||
|
return databaseaccessrestriction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDatabaseaccessrestriction(String databaseaccessrestriction) {
|
||||||
|
this.databaseaccessrestriction = databaseaccessrestriction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDatauploadrestriction() {
|
||||||
|
return datauploadrestriction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDatauploadrestriction(String datauploadrestriction) {
|
||||||
|
this.datauploadrestriction = datauploadrestriction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Boolean getVersioning() {
|
||||||
|
return versioning;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVersioning(Boolean versioning) {
|
||||||
|
this.versioning = versioning;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCitationguidelineurl() {
|
||||||
|
return citationguidelineurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCitationguidelineurl(String citationguidelineurl) {
|
||||||
|
this.citationguidelineurl = citationguidelineurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPidsystems() {
|
||||||
|
return pidsystems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPidsystems(String pidsystems) {
|
||||||
|
this.pidsystems = pidsystems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCertificates() {
|
||||||
|
return certificates;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCertificates(String certificates) {
|
||||||
|
this.certificates = certificates;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Object> getPolicies() {
|
||||||
|
return policies;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPolicies(List<Object> policiesr3) {
|
||||||
|
this.policies = policiesr3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Container getJournal() {
|
||||||
|
return journal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJournal(Container journal) {
|
||||||
|
this.journal = journal;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To store information about the funder funding the project related to the result. It has the following parameters:
|
||||||
|
* - private String shortName to store the short name of the funder (e.g. AKA)
|
||||||
|
* - private String name to store information about the name of the funder (e.g. Akademy of Finland)
|
||||||
|
* - private Fundings funding_stream to store the fundingstream
|
||||||
|
* - private String jurisdiction to store information about the jurisdiction of the funder
|
||||||
|
*/
|
||||||
|
public class Funder implements Serializable {
|
||||||
|
|
||||||
|
private String shortName;
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
private Fundings funding_stream;
|
||||||
|
|
||||||
|
private String jurisdiction;
|
||||||
|
|
||||||
|
public String getShortName() {
|
||||||
|
return shortName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setShortName(String shortName) {
|
||||||
|
this.shortName = shortName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJurisdiction() {
|
||||||
|
return jurisdiction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJurisdiction(String jurisdiction) {
|
||||||
|
this.jurisdiction = jurisdiction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Fundings getFunding_stream() {
|
||||||
|
return funding_stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFunding_stream(Fundings funding_stream) {
|
||||||
|
this.funding_stream = funding_stream;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To store inforamtion about the funding stream. It has two parameters:
|
||||||
|
* - private String id to store the id of the fundings stream. The id is created by appending the shortname of the
|
||||||
|
* funder to the name of each level in the xml representing the fundng stream. For example: if the funder is the
|
||||||
|
* European Commission, the funding level 0 name is FP7, the funding level 1 name is SP3 and the funding level 2 name is
|
||||||
|
* PEOPLE then the id will be: EC::FP7::SP3::PEOPLE
|
||||||
|
* - private String description to describe the funding stream. It is created by concatenating the description of each funding
|
||||||
|
* level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions
|
||||||
|
*/
|
||||||
|
public class Fundings implements Serializable {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
private String description;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To describe the funded amount. It has the following parameters:
|
||||||
|
* - private String currency to store the currency of the fund
|
||||||
|
* - private float totalcost to store the total cost of the project
|
||||||
|
* - private float fundedamount to store the funded amount by the funder
|
||||||
|
*/
|
||||||
|
public class Granted implements Serializable {
|
||||||
|
private String currency;
|
||||||
|
private float totalcost;
|
||||||
|
private float fundedamount;
|
||||||
|
|
||||||
|
public String getCurrency() {
|
||||||
|
return currency;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCurrency(String currency) {
|
||||||
|
this.currency = currency;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getTotalcost() {
|
||||||
|
return totalcost;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTotalcost(float totalcost) {
|
||||||
|
this.totalcost = totalcost;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getFundedamount() {
|
||||||
|
return fundedamount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFundedamount(float fundedamount) {
|
||||||
|
this.fundedamount = fundedamount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
|
||||||
|
Granted granted = new Granted();
|
||||||
|
granted.currency = currency;
|
||||||
|
granted.totalcost = totalcost;
|
||||||
|
granted.fundedamount = fundedamount;
|
||||||
|
return granted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Granted newInstance(String currency, float fundedamount) {
|
||||||
|
Granted granted = new Granted();
|
||||||
|
granted.currency = currency;
|
||||||
|
granted.fundedamount = fundedamount;
|
||||||
|
return granted;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the generic node in a relation. It has the following parameters:
|
||||||
|
* - private String id the openaire id of the entity in the relation
|
||||||
|
* - private String type the type of the entity in the relation.
|
||||||
|
*
|
||||||
|
* Consider the generic relation between a Result R and a Project P, the node representing R will have
|
||||||
|
* as id the id of R and as type result, while the node representing the project will have as id the id of the project
|
||||||
|
* and as type project
|
||||||
|
*/
|
||||||
|
public class Node implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Node newInstance(String id, String type) {
|
||||||
|
Node node = new Node();
|
||||||
|
node.id = id;
|
||||||
|
node.type = type;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the generic organizaiton. It has the following parameters:
|
||||||
|
* - private String legalshortname to store the legalshortname of the organizaiton
|
||||||
|
* - private String legalname to store the legal name of the organization
|
||||||
|
* - private String websiteurl to store the websiteurl of the organization
|
||||||
|
* - private List<String> alternativenames to store the alternative names of the organization
|
||||||
|
* - private Qualifier country to store the country of the organization
|
||||||
|
* - private String id to store the id of the organization
|
||||||
|
* - private List<ControlledField> pid to store the list of pids for the organization
|
||||||
|
*/
|
||||||
|
public class Organization implements Serializable {
|
||||||
|
private String legalshortname;
|
||||||
|
private String legalname;
|
||||||
|
private String websiteurl;
|
||||||
|
private List<String> alternativenames;
|
||||||
|
private Qualifier country;
|
||||||
|
private String id;
|
||||||
|
private List<ControlledField> pid;
|
||||||
|
|
||||||
|
public String getLegalshortname() {
|
||||||
|
return legalshortname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLegalshortname(String legalshortname) {
|
||||||
|
this.legalshortname = legalshortname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLegalname() {
|
||||||
|
return legalname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLegalname(String legalname) {
|
||||||
|
this.legalname = legalname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getWebsiteurl() {
|
||||||
|
return websiteurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWebsiteurl(String websiteurl) {
|
||||||
|
this.websiteurl = websiteurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getAlternativenames() {
|
||||||
|
return alternativenames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAlternativenames(List<String> alternativenames) {
|
||||||
|
this.alternativenames = alternativenames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getCountry() {
|
||||||
|
return country;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCountry(Qualifier country) {
|
||||||
|
this.country = country;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ControlledField> getPid() {
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPid(List<ControlledField> pid) {
|
||||||
|
this.pid = pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To store information about the ec programme for the project. It has the following parameters:
|
||||||
|
* - private String code to store the code of the programme
|
||||||
|
* - private String description to store the description of the programme
|
||||||
|
*/
|
||||||
|
public class Programme implements Serializable {
|
||||||
|
private String code;
|
||||||
|
private String description;
|
||||||
|
|
||||||
|
public String getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCode(String code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Programme newInstance(String code, String description) {
|
||||||
|
Programme p = new Programme();
|
||||||
|
p.code = code;
|
||||||
|
p.description = description;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,195 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
|
||||||
|
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
|
||||||
|
* Projects but we put the information about the Funder within the Project representation. We also removed the
|
||||||
|
* collected from element from the Project. No relation between the Project and the Datasource entity from which it is
|
||||||
|
* collected will be created. We will never create relations between Project and Datasource. In case some relation will
|
||||||
|
* be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project,
|
||||||
|
* project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to
|
||||||
|
* 0. It has the following parameters:
|
||||||
|
* - private String id to store the id of the project (OpenAIRE id)
|
||||||
|
* - private String websiteurl to store the websiteurl of the project
|
||||||
|
* - private String code to store the grant agreement of the project
|
||||||
|
* - private String acronym to store the acronym of the project
|
||||||
|
* - private String title to store the tile of the project
|
||||||
|
* - private String startdate to store the start date
|
||||||
|
* - private String enddate to store the end date
|
||||||
|
* - private String callidentifier to store the call indentifier
|
||||||
|
* - private String keywords to store the keywords
|
||||||
|
* - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate
|
||||||
|
* for publications. This value will be set to true if one of the field in the project represented in the internal model
|
||||||
|
* is set to true
|
||||||
|
* - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for
|
||||||
|
* dataset. It is set to the value in the corresponding filed of the project represented in the internal model
|
||||||
|
* - private List<String> subject to store the list of subjects of the project
|
||||||
|
* - private List<Funder> funding to store the list of funder of the project
|
||||||
|
* - private String summary to store the summary of the project
|
||||||
|
* - private Granted granted to store the granted amount
|
||||||
|
* - private List<Programme> programme to store the list of programmes the project is related to
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class Project implements Serializable {
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
private String websiteurl;
|
||||||
|
private String code;
|
||||||
|
private String acronym;
|
||||||
|
private String title;
|
||||||
|
private String startdate;
|
||||||
|
|
||||||
|
private String enddate;
|
||||||
|
|
||||||
|
private String callidentifier;
|
||||||
|
|
||||||
|
private String keywords;
|
||||||
|
|
||||||
|
private boolean openaccessmandateforpublications;
|
||||||
|
|
||||||
|
private boolean openaccessmandatefordataset;
|
||||||
|
private List<String> subject;
|
||||||
|
|
||||||
|
private List<Funder> funding;
|
||||||
|
|
||||||
|
private String summary;
|
||||||
|
|
||||||
|
private Granted granted;
|
||||||
|
|
||||||
|
private List<Programme> programme;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getWebsiteurl() {
|
||||||
|
return websiteurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWebsiteurl(String websiteurl) {
|
||||||
|
this.websiteurl = websiteurl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCode(String code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAcronym() {
|
||||||
|
return acronym;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAcronym(String acronym) {
|
||||||
|
this.acronym = acronym;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStartdate() {
|
||||||
|
return startdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStartdate(String startdate) {
|
||||||
|
this.startdate = startdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEnddate() {
|
||||||
|
return enddate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEnddate(String enddate) {
|
||||||
|
this.enddate = enddate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCallidentifier() {
|
||||||
|
return callidentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCallidentifier(String callidentifier) {
|
||||||
|
this.callidentifier = callidentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKeywords() {
|
||||||
|
return keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKeywords(String keywords) {
|
||||||
|
this.keywords = keywords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isOpenaccessmandateforpublications() {
|
||||||
|
return openaccessmandateforpublications;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) {
|
||||||
|
this.openaccessmandateforpublications = openaccessmandateforpublications;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isOpenaccessmandatefordataset() {
|
||||||
|
return openaccessmandatefordataset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) {
|
||||||
|
this.openaccessmandatefordataset = openaccessmandatefordataset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSubject() {
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubject(List<String> subject) {
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Funder> getFunding() {
|
||||||
|
return funding;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFunding(List<Funder> funding) {
|
||||||
|
this.funding = funding;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSummary() {
|
||||||
|
return summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSummary(String summary) {
|
||||||
|
this.summary = summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Granted getGranted() {
|
||||||
|
return granted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGranted(Granted granted) {
|
||||||
|
this.granted = granted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Programme> getProgramme() {
|
||||||
|
return programme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProgramme(List<Programme> programme) {
|
||||||
|
this.programme = programme;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the semantics of the generic relation between two entities. It has the following parameters:
|
||||||
|
* - private String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the
|
||||||
|
* relclass parameter in the relation represented in the internal model
|
||||||
|
* represented in the internal model
|
||||||
|
* - private String type to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter
|
||||||
|
* of the relation represented in theinternal model
|
||||||
|
*/
|
||||||
|
public class RelType implements Serializable {
|
||||||
|
private String name; // relclass
|
||||||
|
private String type; // subreltype
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static RelType newInstance(String name, String type) {
|
||||||
|
RelType rel = new RelType();
|
||||||
|
rel.name = name;
|
||||||
|
rel.type = type;
|
||||||
|
return rel;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,68 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent the gereric relation between two entities. It has the following parameters:
|
||||||
|
* - private Node source to represent the entity source of the relation
|
||||||
|
* - private Node target to represent the entity target of the relation
|
||||||
|
* - private RelType reltype to represent the semantics of the relation
|
||||||
|
* - private Provenance provenance to represent the provenance of the relation
|
||||||
|
*/
|
||||||
|
public class Relation implements Serializable {
|
||||||
|
private Node source;
|
||||||
|
private Node target;
|
||||||
|
private RelType reltype;
|
||||||
|
private Provenance provenance;
|
||||||
|
|
||||||
|
public Node getSource() {
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSource(Node source) {
|
||||||
|
this.source = source;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Node getTarget() {
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTarget(Node target) {
|
||||||
|
this.target = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RelType getReltype() {
|
||||||
|
return reltype;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReltype(RelType reltype) {
|
||||||
|
this.reltype = reltype;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Provenance getProvenance() {
|
||||||
|
return provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProvenance(Provenance provenance) {
|
||||||
|
this.provenance = provenance;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
|
||||||
|
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
|
||||||
|
Relation relation = new Relation();
|
||||||
|
relation.source = source;
|
||||||
|
relation.target = target;
|
||||||
|
relation.reltype = reltype;
|
||||||
|
relation.provenance = provenance;
|
||||||
|
return relation;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
|
||||||
|
* to store the list of subjects related to the community
|
||||||
|
*/
|
||||||
|
public class ResearchCommunity extends ResearchInitiative {
|
||||||
|
private List<String> subject;
|
||||||
|
|
||||||
|
public List<String> getSubject() {
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubject(List<String> subject) {
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,71 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
|
||||||
|
* - private String id to store the openaire id for the entity. Is has as code 00 and will be created as
|
||||||
|
* 00|context_____::md5(originalId)
|
||||||
|
* private String originalId to store the id of the context as provided in the profile (i.e. mes)
|
||||||
|
* private String name to store the name of the context (got from the label attribute in the context definition)
|
||||||
|
* private String type to store the type of the context (i.e.: research initiative or research community)
|
||||||
|
* private String description to store the description of the context as given in the profile
|
||||||
|
* private String zenodo_community to store the zenodo community associated to the context (main zenodo community)
|
||||||
|
*/
|
||||||
|
public class ResearchInitiative implements Serializable {
|
||||||
|
private String id; // openaireId
|
||||||
|
private String originalId; // context id
|
||||||
|
private String name; // context name
|
||||||
|
private String type; // context type: research initiative or research community
|
||||||
|
private String description;
|
||||||
|
private String zenodo_community;
|
||||||
|
|
||||||
|
public String getZenodo_community() {
|
||||||
|
return zenodo_community;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setZenodo_community(String zenodo_community) {
|
||||||
|
this.zenodo_community = zenodo_community;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String label) {
|
||||||
|
this.name = label;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOriginalId() {
|
||||||
|
return originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOriginalId(String originalId) {
|
||||||
|
this.originalId = originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
}
|
|
@ -110,13 +110,6 @@ public class CommunityConfigurationFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
||||||
// final Node oacommunitynode = node.selectSingleNode("./oacommunity");
|
|
||||||
// String oacommunity = null;
|
|
||||||
// if (oacommunitynode != null) {
|
|
||||||
// String tmp = oacommunitynode.getText();
|
|
||||||
// if (StringUtils.isNotBlank(tmp))
|
|
||||||
// oacommunity = tmp;
|
|
||||||
// }
|
|
||||||
|
|
||||||
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
||||||
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
|
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
|
||||||
|
@ -127,11 +120,7 @@ public class CommunityConfigurationFactory {
|
||||||
|
|
||||||
zenodoCommunityList.add(zc);
|
zenodoCommunityList.add(zc);
|
||||||
}
|
}
|
||||||
// if (oacommunity != null) {
|
|
||||||
// ZenodoCommunity zc = new ZenodoCommunity();
|
|
||||||
// zc.setZenodoCommunityId(oacommunity);
|
|
||||||
// zenodoCommunityList.add(zc);
|
|
||||||
// }
|
|
||||||
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
||||||
return zenodoCommunityList;
|
return zenodoCommunityList;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class ResultTagger implements Serializable {
|
public class ResultTagger implements Serializable {
|
||||||
|
|
||||||
private String trust = "0.8";
|
|
||||||
|
|
||||||
private boolean clearContext(Result result) {
|
private boolean clearContext(Result result) {
|
||||||
int tmp = result.getContext().size();
|
int tmp = result.getContext().size();
|
||||||
List<Context> clist = result
|
List<Context> clist = result
|
||||||
|
@ -72,8 +70,9 @@ public class ResultTagger implements Serializable {
|
||||||
// tagging for Subject
|
// tagging for Subject
|
||||||
final Set<String> subjects = new HashSet<>();
|
final Set<String> subjects = new HashSet<>();
|
||||||
|
|
||||||
if (Objects.nonNull(result.getSubject())){
|
if (Objects.nonNull(result.getSubject())) {
|
||||||
result.getSubject()
|
result
|
||||||
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.map(subject -> subject.getValue())
|
.map(subject -> subject.getValue())
|
||||||
.filter(StringUtils::isNotBlank)
|
.filter(StringUtils::isNotBlank)
|
||||||
|
@ -91,13 +90,13 @@ public class ResultTagger implements Serializable {
|
||||||
|
|
||||||
if (Objects.nonNull(result.getInstance())) {
|
if (Objects.nonNull(result.getInstance())) {
|
||||||
for (Instance i : result.getInstance()) {
|
for (Instance i : result.getInstance()) {
|
||||||
if(Objects.nonNull(i.getCollectedfrom())){
|
if (Objects.nonNull(i.getCollectedfrom())) {
|
||||||
if(Objects.nonNull(i.getCollectedfrom().getKey())){
|
if (Objects.nonNull(i.getCollectedfrom().getKey())) {
|
||||||
tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
|
tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(Objects.nonNull(i.getHostedby())){
|
if (Objects.nonNull(i.getHostedby())) {
|
||||||
if(Objects.nonNull(i.getHostedby().getKey())){
|
if (Objects.nonNull(i.getHostedby().getKey())) {
|
||||||
tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
|
tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,21 +169,24 @@ public class ResultTagger implements Serializable {
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
BULKTAG_DATA_INFO_TYPE,
|
||||||
CLASS_ID_SUBJECT,
|
CLASS_ID_SUBJECT,
|
||||||
CLASS_NAME_BULKTAG_SUBJECT));
|
CLASS_NAME_BULKTAG_SUBJECT,
|
||||||
|
TAGGING_TRUST));
|
||||||
if (datasources.contains(c.getId()))
|
if (datasources.contains(c.getId()))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
BULKTAG_DATA_INFO_TYPE,
|
||||||
CLASS_ID_DATASOURCE,
|
CLASS_ID_DATASOURCE,
|
||||||
CLASS_NAME_BULKTAG_DATASOURCE));
|
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||||
|
TAGGING_TRUST));
|
||||||
if (czenodo.contains(c.getId()))
|
if (czenodo.contains(c.getId()))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
BULKTAG_DATA_INFO_TYPE,
|
||||||
CLASS_ID_CZENODO,
|
CLASS_ID_CZENODO,
|
||||||
CLASS_NAME_BULKTAG_ZENODO));
|
CLASS_NAME_BULKTAG_ZENODO,
|
||||||
|
TAGGING_TRUST));
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
})
|
})
|
||||||
|
@ -210,21 +212,24 @@ public class ResultTagger implements Serializable {
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
BULKTAG_DATA_INFO_TYPE,
|
||||||
CLASS_ID_SUBJECT,
|
CLASS_ID_SUBJECT,
|
||||||
CLASS_NAME_BULKTAG_SUBJECT));
|
CLASS_NAME_BULKTAG_SUBJECT,
|
||||||
|
TAGGING_TRUST));
|
||||||
if (datasources.contains(c))
|
if (datasources.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
BULKTAG_DATA_INFO_TYPE,
|
||||||
CLASS_ID_DATASOURCE,
|
CLASS_ID_DATASOURCE,
|
||||||
CLASS_NAME_BULKTAG_DATASOURCE));
|
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||||
|
TAGGING_TRUST));
|
||||||
if (czenodo.contains(c))
|
if (czenodo.contains(c))
|
||||||
dataInfoList
|
dataInfoList
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
BULKTAG_DATA_INFO_TYPE,
|
BULKTAG_DATA_INFO_TYPE,
|
||||||
CLASS_ID_CZENODO,
|
CLASS_ID_CZENODO,
|
||||||
CLASS_NAME_BULKTAG_ZENODO));
|
CLASS_NAME_BULKTAG_ZENODO,
|
||||||
|
TAGGING_TRUST));
|
||||||
context.setDataInfo(dataInfoList);
|
context.setDataInfo(dataInfoList);
|
||||||
return context;
|
return context;
|
||||||
})
|
})
|
||||||
|
@ -235,11 +240,12 @@ public class ResultTagger implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DataInfo getDataInfo(
|
public static DataInfo getDataInfo(
|
||||||
String inference_provenance, String inference_class_id, String inference_class_name) {
|
String inference_provenance, String inference_class_id, String inference_class_name, String trust) {
|
||||||
DataInfo di = new DataInfo();
|
DataInfo di = new DataInfo();
|
||||||
di.setInferred(true);
|
di.setInferred(true);
|
||||||
di.setInferenceprovenance(inference_provenance);
|
di.setInferenceprovenance(inference_provenance);
|
||||||
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
|
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
|
||||||
|
di.setTrust(trust);
|
||||||
return di;
|
return di;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,4 +14,6 @@ public class TaggingConstants {
|
||||||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
||||||
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
|
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
|
||||||
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
||||||
|
|
||||||
|
public static final String TAGGING_TRUST = "0.8";
|
||||||
}
|
}
|
||||||
|
|
|
@ -106,12 +106,6 @@
|
||||||
<subject>aqua</subject>
|
<subject>aqua</subject>
|
||||||
<subject>sea</subject>
|
<subject>sea</subject>
|
||||||
</subjects>
|
</subjects>
|
||||||
<providers>
|
|
||||||
<datasource>
|
|
||||||
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
|
||||||
<selcriteria/>
|
|
||||||
</datasource>
|
|
||||||
</providers>
|
|
||||||
<zenodocommunities/>
|
<zenodocommunities/>
|
||||||
</community>
|
</community>
|
||||||
<community id="aginfra">
|
<community id="aginfra">
|
||||||
|
|
|
@ -42,6 +42,12 @@
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
|
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-compress</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-io</groupId>
|
<groupId>commons-io</groupId>
|
||||||
<artifactId>commons-io</artifactId>
|
<artifactId>commons-io</artifactId>
|
||||||
|
@ -59,6 +65,12 @@
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-hive_2.11</artifactId>
|
<artifactId>spark-hive_2.11</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -92,14 +104,21 @@
|
||||||
<groupId>org.postgresql</groupId>
|
<groupId>org.postgresql</groupId>
|
||||||
<artifactId>postgresql</artifactId>
|
<artifactId>postgresql</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpmime</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.victools</groupId>
|
||||||
|
<artifactId>jsonschema-generator</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.json4s</groupId>
|
<groupId>org.json4s</groupId>
|
||||||
<artifactId>json4s-jackson_2.11</artifactId>
|
<artifactId>json4s-jackson_2.11</artifactId>
|
||||||
<version>3.5.3</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
public class Constants {
|
||||||
|
|
||||||
|
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
||||||
|
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
||||||
|
|
||||||
|
public static final String INFERRED = "Inferred by OpenAIRE";
|
||||||
|
|
||||||
|
public static final String HARVESTED = "Harvested";
|
||||||
|
public static final String DEFAULT_TRUST = "0.9";
|
||||||
|
public static final String USER_CLAIM = "Linked by user";;
|
||||||
|
|
||||||
|
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||||
|
|
||||||
|
public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
|
||||||
|
|
||||||
|
public static String RESEARCH_COMMUNITY = "Research Community";
|
||||||
|
|
||||||
|
public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
||||||
|
|
||||||
|
public static String ORCID = "orcid";
|
||||||
|
|
||||||
|
static {
|
||||||
|
accessRightsCoarMap.put("OPEN", "c_abf2");
|
||||||
|
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||||
|
accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
|
||||||
|
accessRightsCoarMap.put("CLOSED", "c_14cb");
|
||||||
|
accessRightsCoarMap.put("EMBARGO", "c_f1cf");
|
||||||
|
}
|
||||||
|
|
||||||
|
static {
|
||||||
|
coarCodeLabelMap.put("c_abf2", "OPEN");
|
||||||
|
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
|
||||||
|
coarCodeLabelMap.put("c_14cb", "CLOSED");
|
||||||
|
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It fires the execution of the actual dump for result entities. If the dump is for RC/RI products its checks for each
|
||||||
|
* result its belongingess to at least one RC/RI before "asking" for its mapping.
|
||||||
|
*/
|
||||||
|
public class DumpProducts implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
|
||||||
|
Class<? extends OafEntity> inputClazz,
|
||||||
|
Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result> outputClazz,
|
||||||
|
boolean graph) {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, graph);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> void execDump(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
String communityMapPath,
|
||||||
|
Class<I> inputClazz,
|
||||||
|
Class<O> outputClazz,
|
||||||
|
boolean graph) {
|
||||||
|
|
||||||
|
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(value -> execMap(value, communityMap, graph), Encoders.bean(outputClazz))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
|
||||||
|
CommunityMap communityMap,
|
||||||
|
boolean graph) {
|
||||||
|
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||||
|
if (odInfo.isPresent()) {
|
||||||
|
if (odInfo.get().getDeletedbyinference()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!graph) {
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
|
||||||
|
Optional<List<Context>> inputContext = Optional
|
||||||
|
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
|
||||||
|
if (!inputContext.isPresent()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
List<String> toDumpFor = inputContext.get().stream().map(c -> {
|
||||||
|
if (communities.contains(c.getId())) {
|
||||||
|
return c.getId();
|
||||||
|
}
|
||||||
|
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
|
||||||
|
return c.getId().substring(0, 3);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
if (toDumpFor.size() == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (O) ResultMapper.map(value, communityMap, graph);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,114 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
import org.apache.commons.compress.archivers.ar.ArArchiveEntry;
|
||||||
|
import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
|
||||||
|
public class MakeTar implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MakeTar.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
MakeTar.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("hdfsPath");
|
||||||
|
log.info("hdfsPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("input path : {}", inputPath);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
makeTArArchive(fileSystem, inputPath, outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath) throws IOException {
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||||
|
|
||||||
|
while (dir_iterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = dir_iterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String p_string = p.toString();
|
||||||
|
String entity = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
|
|
||||||
|
write(fileSystem, p_string, outputPath + "/" + entity + ".tar", entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
|
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(inputPath), true);
|
||||||
|
|
||||||
|
while (fileStatusListIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String p_string = p.toString();
|
||||||
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
|
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
|
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name + ".json.gz");
|
||||||
|
entry.setSize(fileStatus.getLen());
|
||||||
|
ar.putArchiveEntry(entry);
|
||||||
|
|
||||||
|
InputStream is = fileSystem.open(fileStatus.getPath());
|
||||||
|
|
||||||
|
BufferedInputStream bis = new BufferedInputStream(is);
|
||||||
|
|
||||||
|
int count;
|
||||||
|
byte data[] = new byte[1024];
|
||||||
|
while ((count = bis.read(data, 0, data.length)) != -1) {
|
||||||
|
ar.write(data, 0, count);
|
||||||
|
}
|
||||||
|
bis.close();
|
||||||
|
ar.closeArchiveEntry();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ar.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Element;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class QueryInformationSystem {
|
||||||
|
|
||||||
|
private ISLookUpService isLookUp;
|
||||||
|
|
||||||
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') "
|
||||||
|
+
|
||||||
|
" return " +
|
||||||
|
"<community> " +
|
||||||
|
"{$x//CONFIGURATION/context/@id}" +
|
||||||
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
|
"</community>";
|
||||||
|
|
||||||
|
public CommunityMap getCommunityMap()
|
||||||
|
throws ISLookUpException, DocumentException {
|
||||||
|
return getMap(isLookUp.quickSearchProfile(XQUERY));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public ISLookUpService getIsLookUp() {
|
||||||
|
return isLookUp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsLookUp(ISLookUpService isLookUpService) {
|
||||||
|
this.isLookUp = isLookUpService;
|
||||||
|
}
|
||||||
|
|
||||||
|
private CommunityMap getMap(List<String> communityMap) throws DocumentException {
|
||||||
|
final CommunityMap map = new CommunityMap();
|
||||||
|
|
||||||
|
for (String xml : communityMap) {
|
||||||
|
final Document doc;
|
||||||
|
doc = new SAXReader().read(new StringReader(xml));
|
||||||
|
Element root = doc.getRootElement();
|
||||||
|
map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,523 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
|
public static <I extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
||||||
|
I in, Map<String, String> communityMap, boolean graph) {
|
||||||
|
|
||||||
|
Result out;
|
||||||
|
if (graph) {
|
||||||
|
out = new Result();
|
||||||
|
} else {
|
||||||
|
out = new CommunityResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
|
||||||
|
if (ort.isPresent()) {
|
||||||
|
switch (ort.get().getClassid()) {
|
||||||
|
case "publication":
|
||||||
|
Optional<Journal> journal = Optional
|
||||||
|
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
|
||||||
|
if (journal.isPresent()) {
|
||||||
|
Journal j = journal.get();
|
||||||
|
Container c = new Container();
|
||||||
|
c.setConferencedate(j.getConferencedate());
|
||||||
|
c.setConferenceplace(j.getConferenceplace());
|
||||||
|
c.setEdition(j.getEdition());
|
||||||
|
c.setEp(j.getEp());
|
||||||
|
c.setIss(j.getIss());
|
||||||
|
c.setIssnLinking(j.getIssnLinking());
|
||||||
|
c.setIssnOnline(j.getIssnOnline());
|
||||||
|
c.setIssnPrinted(j.getIssnPrinted());
|
||||||
|
c.setName(j.getName());
|
||||||
|
c.setSp(j.getSp());
|
||||||
|
c.setVol(j.getVol());
|
||||||
|
out.setContainer(c);
|
||||||
|
out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "dataset":
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
|
||||||
|
Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
|
||||||
|
Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
|
||||||
|
|
||||||
|
out
|
||||||
|
.setGeolocation(
|
||||||
|
Optional
|
||||||
|
.ofNullable(id.getGeolocation())
|
||||||
|
.map(
|
||||||
|
igl -> igl
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(gli -> {
|
||||||
|
GeoLocation gl = new GeoLocation();
|
||||||
|
gl.setBox(gli.getBox());
|
||||||
|
gl.setPlace(gli.getPlace());
|
||||||
|
gl.setPoint(gli.getPoint());
|
||||||
|
return gl;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
break;
|
||||||
|
case "software":
|
||||||
|
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getCodeRepositoryUrl())
|
||||||
|
.ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getDocumentationUrl())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out
|
||||||
|
.setDocumentationUrl(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(v -> v.getValue())
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getProgrammingLanguage())
|
||||||
|
.ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
|
||||||
|
|
||||||
|
out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
break;
|
||||||
|
case "other":
|
||||||
|
|
||||||
|
eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
|
||||||
|
out
|
||||||
|
.setContactgroup(
|
||||||
|
Optional
|
||||||
|
.ofNullable(ir.getContactgroup())
|
||||||
|
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
out
|
||||||
|
.setContactperson(
|
||||||
|
Optional
|
||||||
|
.ofNullable(ir.getContactperson())
|
||||||
|
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
out
|
||||||
|
.setTool(
|
||||||
|
Optional
|
||||||
|
.ofNullable(ir.getTool())
|
||||||
|
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getAuthor())
|
||||||
|
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
// I do not map Access Right UNKNOWN or OTHER
|
||||||
|
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
||||||
|
if (oar.isPresent()) {
|
||||||
|
if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
||||||
|
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
|
||||||
|
out
|
||||||
|
.setBestaccessright(
|
||||||
|
AccessRight
|
||||||
|
.newInstance(
|
||||||
|
code,
|
||||||
|
Constants.coarCodeLabelMap.get(code),
|
||||||
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<String> contributorList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getContributor())
|
||||||
|
.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
|
||||||
|
out.setContributor(contributorList);
|
||||||
|
|
||||||
|
// List<Country> countryList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out
|
||||||
|
.setCountry(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
c -> {
|
||||||
|
if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Country country = new Country();
|
||||||
|
country.setCode(c.getClassid());
|
||||||
|
country.setLabel(c.getClassname());
|
||||||
|
Optional
|
||||||
|
.ofNullable(c.getDataInfo())
|
||||||
|
.ifPresent(
|
||||||
|
provenance -> country
|
||||||
|
.setProvenance(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
provenance
|
||||||
|
.getProvenanceaction()
|
||||||
|
.getClassname(),
|
||||||
|
c.getDataInfo().getTrust())));
|
||||||
|
return country;
|
||||||
|
})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
// out.setCountry(countryList);
|
||||||
|
|
||||||
|
final List<String> coverageList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCoverage())
|
||||||
|
.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
|
||||||
|
out.setCoverage(coverageList);
|
||||||
|
|
||||||
|
out.setDateofcollection(input.getDateofcollection());
|
||||||
|
|
||||||
|
final List<String> descriptionList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getDescription())
|
||||||
|
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
||||||
|
out.setDescription(descriptionList);
|
||||||
|
Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setEmbargoenddate(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<String> formatList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getFormat())
|
||||||
|
.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
|
||||||
|
out.setFormat(formatList);
|
||||||
|
out.setId(input.getId());
|
||||||
|
out.setOriginalId(input.getOriginalId());
|
||||||
|
|
||||||
|
final List<Instance> instanceList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getInstance())
|
||||||
|
.ifPresent(
|
||||||
|
inst -> inst
|
||||||
|
.stream()
|
||||||
|
.forEach(i -> instanceList.add(getInstance(i, graph))));
|
||||||
|
out
|
||||||
|
.setInstance(instanceList);
|
||||||
|
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
|
||||||
|
if (oL.isPresent()) {
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
|
||||||
|
out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
|
||||||
|
}
|
||||||
|
Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
|
||||||
|
if (oLong.isPresent()) {
|
||||||
|
out.setLastupdatetimestamp(oLong.get());
|
||||||
|
}
|
||||||
|
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
|
||||||
|
if (otitle.isPresent()) {
|
||||||
|
List<StructuredProperty> iTitle = otitle
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (iTitle.size() > 0) {
|
||||||
|
out.setMaintitle(iTitle.get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
iTitle = otitle
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (iTitle.size() > 0) {
|
||||||
|
out.setSubtitle(iTitle.get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
List<ControlledField> pids = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.stream()
|
||||||
|
.forEach(
|
||||||
|
p -> pids
|
||||||
|
.add(
|
||||||
|
ControlledField
|
||||||
|
.newInstance(p.getQualifier().getClassid(), p.getValue()))));
|
||||||
|
out.setPid(pids);
|
||||||
|
oStr = Optional.ofNullable(input.getDateofacceptance());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setPublicationdate(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
oStr = Optional.ofNullable(input.getPublisher());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setPublisher(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> sourceList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getSource())
|
||||||
|
.ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
|
||||||
|
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
||||||
|
List<Subject> subjectList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getSubject())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.forEach(s -> subjectList.add(getSubject(s))));
|
||||||
|
|
||||||
|
out.setSubjects(subjectList);
|
||||||
|
|
||||||
|
out.setType(input.getResulttype().getClassid());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!graph) {
|
||||||
|
((CommunityResult) out)
|
||||||
|
.setCollectedfrom(
|
||||||
|
input
|
||||||
|
.getCollectedfrom()
|
||||||
|
.stream()
|
||||||
|
.map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
List<Context> contextList = Optional
|
||||||
|
.ofNullable(
|
||||||
|
input
|
||||||
|
.getContext())
|
||||||
|
.map(
|
||||||
|
value -> value
|
||||||
|
.stream()
|
||||||
|
.map(c -> {
|
||||||
|
String community_id = c.getId();
|
||||||
|
if (community_id.indexOf("::") > 0) {
|
||||||
|
community_id = community_id.substring(0, community_id.indexOf("::"));
|
||||||
|
}
|
||||||
|
if (communities.contains(community_id)) {
|
||||||
|
Context context = new Context();
|
||||||
|
context.setCode(community_id);
|
||||||
|
context.setLabel(communityMap.get(community_id));
|
||||||
|
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
||||||
|
if (dataInfo.isPresent()) {
|
||||||
|
List<Provenance> provenance = new ArrayList<>();
|
||||||
|
provenance
|
||||||
|
.addAll(
|
||||||
|
dataInfo
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
di -> Optional
|
||||||
|
.ofNullable(di.getProvenanceaction())
|
||||||
|
.map(
|
||||||
|
provenanceaction -> Provenance
|
||||||
|
.newInstance(
|
||||||
|
provenanceaction.getClassname(), di.getTrust()))
|
||||||
|
.orElse(null))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toSet()));
|
||||||
|
|
||||||
|
context.setProvenance(getUniqueProvenance(provenance));
|
||||||
|
}
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>());
|
||||||
|
|
||||||
|
if (contextList.size() > 0) {
|
||||||
|
Set<Integer> hashValue = new HashSet<>();
|
||||||
|
List<Context> remainigContext = new ArrayList<>();
|
||||||
|
contextList.forEach(c -> {
|
||||||
|
if (!hashValue.contains(c.hashCode())) {
|
||||||
|
remainigContext.add(c);
|
||||||
|
hashValue.add(c.hashCode());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
((CommunityResult) out).setContext(remainigContext);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Instance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i, boolean graph) {
|
||||||
|
|
||||||
|
Instance instance = new Instance();
|
||||||
|
|
||||||
|
if(!graph){
|
||||||
|
instance
|
||||||
|
.setCollectedfrom(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
|
||||||
|
instance
|
||||||
|
.setHostedby(
|
||||||
|
KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> opAr = Optional
|
||||||
|
.ofNullable(i.getAccessright());
|
||||||
|
if (opAr.isPresent()) {
|
||||||
|
if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
||||||
|
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
|
||||||
|
instance
|
||||||
|
.setAccessright(
|
||||||
|
AccessRight
|
||||||
|
.newInstance(
|
||||||
|
code,
|
||||||
|
Constants.coarCodeLabelMap.get(code),
|
||||||
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getLicense())
|
||||||
|
.ifPresent(value -> instance.setLicense(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getDateofacceptance())
|
||||||
|
.ifPresent(value -> instance.setPublicationdate(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getRefereed())
|
||||||
|
.ifPresent(value -> instance.setRefereed(value.getClassname()));
|
||||||
|
// .ifPresent(value -> instance.setRefereed(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getInstancetype())
|
||||||
|
.ifPresent(value -> instance.setType(value.getClassname()));
|
||||||
|
Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) {
|
||||||
|
Provenance iProv = new Provenance();
|
||||||
|
// iProv.setProvenance(Constants.INFERRED);
|
||||||
|
|
||||||
|
Provenance hProv = new Provenance();
|
||||||
|
// hProv.setProvenance(Constants.HARVESTED);
|
||||||
|
Provenance lProv = new Provenance();
|
||||||
|
|
||||||
|
for (Provenance p : provenance) {
|
||||||
|
switch (p.getProvenance()) {
|
||||||
|
case Constants.HARVESTED:
|
||||||
|
hProv = getHighestTrust(hProv, p);
|
||||||
|
break;
|
||||||
|
case Constants.INFERRED:
|
||||||
|
iProv = getHighestTrust(iProv, p);
|
||||||
|
// To be removed as soon as the new beta run has been done
|
||||||
|
// this fixex issue of not set trust during bulktagging
|
||||||
|
if (StringUtils.isEmpty(iProv.getTrust())) {
|
||||||
|
iProv.setTrust(Constants.DEFAULT_TRUST);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Constants.USER_CLAIM:
|
||||||
|
lProv = getHighestTrust(lProv, p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return Arrays
|
||||||
|
.asList(iProv, hProv, lProv)
|
||||||
|
.stream()
|
||||||
|
.filter(p -> !StringUtils.isEmpty(p.getProvenance()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Provenance getHighestTrust(Provenance hProv, Provenance p) {
|
||||||
|
if (StringUtils.isNoneEmpty(hProv.getTrust(), p.getTrust()))
|
||||||
|
return hProv.getTrust().compareTo(p.getTrust()) > 0 ? hProv : p;
|
||||||
|
|
||||||
|
return (StringUtils.isEmpty(p.getTrust()) && !StringUtils.isEmpty(hProv.getTrust())) ? hProv : p;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Subject getSubject(StructuredProperty s) {
|
||||||
|
Subject subject = new Subject();
|
||||||
|
subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
|
||||||
|
Optional<DataInfo> di = Optional.ofNullable(s.getDataInfo());
|
||||||
|
if (di.isPresent()) {
|
||||||
|
Provenance p = new Provenance();
|
||||||
|
p.setProvenance(di.get().getProvenanceaction().getClassname());
|
||||||
|
p.setTrust(di.get().getTrust());
|
||||||
|
subject.setProvenance(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
|
||||||
|
Author a = new Author();
|
||||||
|
a.setFullname(oa.getFullname());
|
||||||
|
a.setName(oa.getName());
|
||||||
|
a.setSurname(oa.getSurname());
|
||||||
|
a.setRank(oa.getRank());
|
||||||
|
|
||||||
|
Optional<List<StructuredProperty>> oPids = Optional
|
||||||
|
.ofNullable(oa.getPid());
|
||||||
|
if (oPids.isPresent()) {
|
||||||
|
Pid pid = getOrcid(oPids.get());
|
||||||
|
if (pid != null) {
|
||||||
|
a.setPid(pid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Pid getOrcid(List<StructuredProperty> p) {
|
||||||
|
for (StructuredProperty pid : p) {
|
||||||
|
if (pid.getQualifier().getClassid().equals(Constants.ORCID)) {
|
||||||
|
Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
|
||||||
|
if (di.isPresent()) {
|
||||||
|
return Pid
|
||||||
|
.newInstance(
|
||||||
|
ControlledField
|
||||||
|
.newInstance(
|
||||||
|
pid.getQualifier().getClassid(),
|
||||||
|
pid.getValue()),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
di.get().getProvenanceaction().getClassname(),
|
||||||
|
di.get().getTrust()));
|
||||||
|
} else {
|
||||||
|
return Pid
|
||||||
|
.newInstance(
|
||||||
|
ControlledField
|
||||||
|
.newInstance(
|
||||||
|
pid.getQualifier().getClassid(),
|
||||||
|
pid.getValue())
|
||||||
|
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
|
||||||
|
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
|
||||||
|
* - research infrastructure/initiative , the value is the label of the research community - research
|
||||||
|
* infrastructure/initiative.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
||||||
|
private final QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private final Configuration conf;
|
||||||
|
private final BufferedWriter writer;
|
||||||
|
|
||||||
|
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
||||||
|
conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath);
|
||||||
|
}
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
|
||||||
|
writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SaveCommunityMap.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String nameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", nameNode);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
|
||||||
|
|
||||||
|
scm.saveCommunityMap();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException {
|
||||||
|
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap()));
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.*;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||||
|
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
|
||||||
|
public class SendToZenodoHDFS implements Serializable {
|
||||||
|
|
||||||
|
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
SendToZenodoHDFS.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json")));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
final String access_token = parser.get("accessToken");
|
||||||
|
final String connection_url = parser.get("connectionUrl");
|
||||||
|
final String metadata = parser.get("metadata");
|
||||||
|
final Boolean newDeposition = Boolean.valueOf(parser.get("newDeposition"));
|
||||||
|
final String concept_rec_id = Optional
|
||||||
|
.ofNullable(parser.get("conceptRecordId"))
|
||||||
|
.orElse(null);
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(hdfsPath), true);
|
||||||
|
ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token);
|
||||||
|
if (newDeposition) {
|
||||||
|
zenodoApiClient.newDeposition();
|
||||||
|
} else {
|
||||||
|
if (concept_rec_id == null) {
|
||||||
|
throw new MissingConceptDoiException("No concept record id has been provided");
|
||||||
|
}
|
||||||
|
zenodoApiClient.newVersion(concept_rec_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (fileStatusListIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String p_string = p.toString();
|
||||||
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
|
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
|
||||||
|
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
|
log.info("Sending information for community: " + name);
|
||||||
|
if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
|
||||||
|
name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
|
||||||
|
}
|
||||||
|
|
||||||
|
FSDataInputStream inputStream = fileSystem.open(p);
|
||||||
|
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
zenodoApiClient.sendMretadata(metadata);
|
||||||
|
zenodoApiClient.publish();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.graph.Constants;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class Utils {
|
||||||
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void removeOutputDir(SparkSession spark, String path) {
|
||||||
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <R> Dataset<R> readPath(
|
||||||
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
|
return spark
|
||||||
|
.read()
|
||||||
|
.textFile(inputPath)
|
||||||
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
|
||||||
|
return ISLookupClientFactory.getLookUpService(isLookUpUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getContextId(String id) {
|
||||||
|
|
||||||
|
return String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5(id));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CommunityMap getCommunityMap(SparkSession spark, String communityMapPath) {
|
||||||
|
|
||||||
|
return new Gson().fromJson(spark.read().textFile(communityMapPath).collectAsList().get(0), CommunityMap.class);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException {
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath))));
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
try {
|
||||||
|
String line;
|
||||||
|
while ((line = br.readLine()) != null) {
|
||||||
|
sb.append(line);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
br.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Gson().fromJson(sb.toString(), CommunityMap.class);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
public class CommunityMap extends HashMap<String, String> implements Serializable {
|
||||||
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class splits the dumped results according to the research community - research initiative/infrastructure they
|
||||||
|
* are related to. The information about the community is found in the element "context.id" in the result. Since the
|
||||||
|
* context that can be found in the result can be associated not only to communities, a community Map is provided. It
|
||||||
|
* will guide the splitting process. Note: the repartition(1) just before writing the results related to a community.
|
||||||
|
* This is a choice due to uploading constraints (just one file for each community) As soon as a better solution will be
|
||||||
|
* in place remove the repartition
|
||||||
|
*/
|
||||||
|
public class CommunitySplit implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath) {
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
execSplit(spark, inputPath, outputPath, Utils.getCommunityMap(spark, communityMapPath).keySet());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void execSplit(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Set<String> communities) {
|
||||||
|
|
||||||
|
Dataset<CommunityResult> result = Utils
|
||||||
|
.readPath(spark, inputPath + "/publication", CommunityResult.class)
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
||||||
|
|
||||||
|
communities
|
||||||
|
.stream()
|
||||||
|
.forEach(c -> printResult(c, result, outputPath));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
|
||||||
|
Dataset<CommunityResult> community_products = result
|
||||||
|
.filter(r -> containsCommunity(r, c));
|
||||||
|
|
||||||
|
try {
|
||||||
|
community_products.first();
|
||||||
|
community_products
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath + "/" + c);
|
||||||
|
} catch (Exception e) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean containsCommunity(CommunityResult r, String c) {
|
||||||
|
if (Optional.ofNullable(r.getContext()).isPresent()) {
|
||||||
|
return r
|
||||||
|
.getContext()
|
||||||
|
.stream()
|
||||||
|
.filter(con -> con.getCode().equals(c))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.size() > 0;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
|
||||||
|
public class ResultProject implements Serializable {
|
||||||
|
private String resultId;
|
||||||
|
private List<Project> projectsList;
|
||||||
|
|
||||||
|
public String getResultId() {
|
||||||
|
return resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResultId(String resultId) {
|
||||||
|
this.resultId = resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Project> getProjectsList() {
|
||||||
|
return projectsList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProjectsList(List<Project> projectsList) {
|
||||||
|
this.projectsList = projectsList;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark action to trigger the dump of results associated to research community - reseach initiative/infrasctructure The
|
||||||
|
* actual dump if performed via the class DumpProducts that is used also for the entire graph dump
|
||||||
|
*/
|
||||||
|
public class SparkDumpCommunityProducts implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpCommunityProducts.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
|
||||||
|
false);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,185 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preparation of the Project information to be added to the dumped results. For each result associated to at least one
|
||||||
|
* Project, a serialization of an instance af ResultProject closs is done. ResultProject contains the resultId, and the
|
||||||
|
* list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to
|
||||||
|
*/
|
||||||
|
public class SparkPrepareResultProject implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkPrepareResultProject.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
prepareResultProjectList(spark, inputPath, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Dataset<Relation> relation = Utils
|
||||||
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
|
.filter("dataInfo.deletedbyinference = false and relClass = 'produces'");
|
||||||
|
Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils
|
||||||
|
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
|
||||||
|
|
||||||
|
projects
|
||||||
|
.joinWith(relation, projects.col("id").equalTo(relation.col("source")))
|
||||||
|
.groupByKey(
|
||||||
|
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, String>) value -> value
|
||||||
|
._2()
|
||||||
|
.getTarget(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, ResultProject>) (s,
|
||||||
|
it) -> {
|
||||||
|
Set<String> projectSet = new HashSet<>();
|
||||||
|
Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next();
|
||||||
|
ResultProject rp = new ResultProject();
|
||||||
|
rp.setResultId(first._2().getTarget());
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Project p = first._1();
|
||||||
|
projectSet.add(p.getId());
|
||||||
|
Project ps = getProject(p);
|
||||||
|
|
||||||
|
List<Project> projList = new ArrayList<>();
|
||||||
|
projList.add(ps);
|
||||||
|
rp.setProjectsList(projList);
|
||||||
|
it.forEachRemaining(c -> {
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Project op = c._1();
|
||||||
|
if (!projectSet.contains(op.getId())) {
|
||||||
|
projList
|
||||||
|
.add(getProject(op));
|
||||||
|
|
||||||
|
projectSet.add(op.getId());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
return rp;
|
||||||
|
}, Encoders.bean(ResultProject.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op) {
|
||||||
|
Project p = Project
|
||||||
|
.newInstance(
|
||||||
|
op.getId(),
|
||||||
|
op.getCode().getValue(),
|
||||||
|
Optional
|
||||||
|
.ofNullable(op.getAcronym())
|
||||||
|
.map(a -> a.getValue())
|
||||||
|
.orElse(null),
|
||||||
|
Optional
|
||||||
|
.ofNullable(op.getTitle())
|
||||||
|
.map(v -> v.getValue())
|
||||||
|
.orElse(null),
|
||||||
|
Optional
|
||||||
|
.ofNullable(op.getFundingtree())
|
||||||
|
.map(
|
||||||
|
value -> value
|
||||||
|
.stream()
|
||||||
|
.map(ft -> getFunder(ft.getValue()))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
Optional<DataInfo> di = Optional.ofNullable(op.getDataInfo());
|
||||||
|
Provenance provenance = new Provenance();
|
||||||
|
if (di.isPresent()) {
|
||||||
|
provenance.setProvenance(di.get().getProvenanceaction().getClassname());
|
||||||
|
provenance.setTrust(di.get().getTrust());
|
||||||
|
p.setProvenance(provenance);
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Funder getFunder(String fundingtree) {
|
||||||
|
// ["<fundingtree><funder><id>nsf_________::NSF</id><shortname>NSF</shortname><name>National Science
|
||||||
|
// Foundation</name><jurisdiction>US</jurisdiction></funder><funding_level_1><id>nsf_________::NSF::CISE/OAD::CISE/CCF</id><description>Division
|
||||||
|
// of Computing and Communication Foundations</description><name>Division of Computing and Communication
|
||||||
|
// Foundations</name><parent><funding_level_0><id>nsf_________::NSF::CISE/OAD</id><description>Directorate for
|
||||||
|
// Computer & Information Science & Engineering</description><name>Directorate for Computer &
|
||||||
|
// Information Science &
|
||||||
|
// Engineering</name><parent/><class>nsf:fundingStream</class></funding_level_0></parent></funding_level_1></fundingtree>"]
|
||||||
|
Funder f = new Funder();
|
||||||
|
final Document doc;
|
||||||
|
try {
|
||||||
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
|
f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
|
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||||
|
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
for (Object o : doc.selectNodes("//funding_level_0")) {
|
||||||
|
List node = ((Node) o).selectNodes("./name");
|
||||||
|
f.setFundingStream(((Node) node.get(0)).getText());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
|
} catch (DocumentException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark job to trigger the split of results associated to research community - reseach initiative/infrasctructure. The
|
||||||
|
* actual split is performed by the class CommunitySplit
|
||||||
|
*/
|
||||||
|
public class SparkSplitForCommunity implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSplitForCommunity.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
CommunitySplit split = new CommunitySplit();
|
||||||
|
split.run(isSparkSessionManaged, inputPath, outputPath, communityMapPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
|
||||||
|
public class SparkUpdateProjectInfo implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkUpdateProjectInfo.class);
|
||||||
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkUpdateProjectInfo.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String preparedInfoPath = parser.get("preparedInfoPath");
|
||||||
|
log.info("preparedInfoPath: {}", preparedInfoPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void extend(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
String preparedInfoPath) {
|
||||||
|
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
||||||
|
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
||||||
|
result
|
||||||
|
.joinWith(
|
||||||
|
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
||||||
|
"left")
|
||||||
|
.map(value -> {
|
||||||
|
CommunityResult r = value._1();
|
||||||
|
Optional.ofNullable(value._2()).ifPresent(rp -> {
|
||||||
|
r.setProjects(rp.getProjectsList());
|
||||||
|
});
|
||||||
|
return r;
|
||||||
|
}, Encoders.bean(CommunityResult.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Constants implements Serializable {
|
||||||
|
|
||||||
|
public static final String IS_HOSTED_BY = "isHostedBy";
|
||||||
|
public static final String HOSTS = "hosts";
|
||||||
|
|
||||||
|
public static final String IS_FUNDED_BY = "isFundedBy";
|
||||||
|
public static final String FUNDS = "funds";
|
||||||
|
|
||||||
|
public static final String FUNDINGS = "fundings";
|
||||||
|
|
||||||
|
public static final String RESULT_ENTITY = "result";
|
||||||
|
public static final String DATASOURCE_ENTITY = "datasource";
|
||||||
|
public static final String CONTEXT_ENTITY = "context";
|
||||||
|
public static final String ORGANIZATION_ENTITY = "organization";
|
||||||
|
public static final String PROJECT_ENTITY = "project";
|
||||||
|
|
||||||
|
public static final String CONTEXT_ID = "00";
|
||||||
|
public static final String CONTEXT_NS_PREFIX = "context_____";
|
||||||
|
|
||||||
|
// public static final String FUNDER_DS = "entityregistry::projects";
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deserialization of the information in the context needed to create Context Entities, and relations between context
|
||||||
|
* entities and datasources and projects
|
||||||
|
*/
|
||||||
|
public class ContextInfo implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String description;
|
||||||
|
private String type;
|
||||||
|
private String zenodocommunity;
|
||||||
|
private String name;
|
||||||
|
private List<String> projectList;
|
||||||
|
private List<String> datasourceList;
|
||||||
|
private List<String> subject;
|
||||||
|
|
||||||
|
public List<String> getSubject() {
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubject(List<String> subject) {
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getZenodocommunity() {
|
||||||
|
return zenodocommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setZenodocommunity(String zenodocommunity) {
|
||||||
|
this.zenodocommunity = zenodocommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getProjectList() {
|
||||||
|
return projectList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProjectList(List<String> projectList) {
|
||||||
|
this.projectList = projectList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getDatasourceList() {
|
||||||
|
return datasourceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDatasourceList(List<String> datasourceList) {
|
||||||
|
this.datasourceList = datasourceList;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,105 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
|
||||||
|
* collects the general information for contexes of type community or ri. The general information is the id of the
|
||||||
|
* context, its label, the subjects associated to the context, its zenodo community, description and type. This
|
||||||
|
* information is used to create a new Context Entity
|
||||||
|
*/
|
||||||
|
public class CreateContextEntities implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
|
||||||
|
private final Configuration conf;
|
||||||
|
private final BufferedWriter writer;
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
CreateContextEntities.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
log.info("hdfsPath: {}", hdfsPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
|
||||||
|
|
||||||
|
log.info("Processing contexts...");
|
||||||
|
cce.execute(Process::getEntity, isLookUpUrl);
|
||||||
|
|
||||||
|
cce.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void close() throws IOException {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CreateContextEntities(String hdfsPath, String hdfsNameNode) throws IOException {
|
||||||
|
this.conf = new Configuration();
|
||||||
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||||
|
} else {
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
|
||||||
|
throws Exception {
|
||||||
|
|
||||||
|
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
|
||||||
|
|
||||||
|
queryInformationSystem.getContextInformation(consumer);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected <R extends ResearchInitiative> void writeEntity(final R r) {
|
||||||
|
try {
|
||||||
|
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||||
|
// log.info("writing context : {}", new Gson().toJson(r));
|
||||||
|
writer.newLine();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,124 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
|
||||||
|
* and the project is not created because of a low coverage in the profiles of openaire ids related to projects
|
||||||
|
*/
|
||||||
|
public class CreateContextRelation implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
|
||||||
|
private final Configuration conf;
|
||||||
|
private final BufferedWriter writer;
|
||||||
|
private final QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
|
||||||
|
private static final String CONTEX_RELATION_PROJECT = "projects";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
CreateContextRelation.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
log.info("hdfsPath: {}", hdfsPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
|
||||||
|
|
||||||
|
log.info("Creating relation for datasource...");
|
||||||
|
cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));
|
||||||
|
|
||||||
|
log.info("Creating relations for projects... ");
|
||||||
|
// cce
|
||||||
|
// .execute(
|
||||||
|
// Process::getRelation, CONTEX_RELATION_PROJECT,
|
||||||
|
// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
|
||||||
|
|
||||||
|
cce.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void close() throws IOException {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
|
||||||
|
throws IOException, ISLookUpException {
|
||||||
|
this.conf = new Configuration();
|
||||||
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
queryInformationSystem.execContextRelationQuery();
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||||
|
} else {
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
|
||||||
|
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c));
|
||||||
|
|
||||||
|
queryInformationSystem.getContextRelation(consumer, category, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void writeEntity(final Relation r) {
|
||||||
|
try {
|
||||||
|
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||||
|
writer.newLine();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,496 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
|
||||||
|
* Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
|
||||||
|
*/
|
||||||
|
public class DumpGraphEntities implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<? extends OafEntity> inputClazz,
|
||||||
|
String communityMapPath) {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
|
||||||
|
case "50":
|
||||||
|
DumpProducts d = new DumpProducts();
|
||||||
|
d
|
||||||
|
.run(
|
||||||
|
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, Result.class,
|
||||||
|
true);
|
||||||
|
break;
|
||||||
|
case "40":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
projectMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "20":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
organizationMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "10":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
datasourceMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p), Encoders.bean(Project.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
|
||||||
|
Datasource datasource = new Datasource();
|
||||||
|
|
||||||
|
datasource.setId(d.getId());
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOriginalId())
|
||||||
|
.ifPresent(
|
||||||
|
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
pids -> pids
|
||||||
|
.stream()
|
||||||
|
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatasourcetype())
|
||||||
|
.ifPresent(
|
||||||
|
dsType -> datasource
|
||||||
|
.setDatasourcetype(ControlledField.newInstance(dsType.getClassid(), dsType.getClassname())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOpenairecompatibility())
|
||||||
|
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOfficialname())
|
||||||
|
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getEnglishname())
|
||||||
|
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getWebsiteurl())
|
||||||
|
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getLogourl())
|
||||||
|
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDateofvalidation())
|
||||||
|
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDescription())
|
||||||
|
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getSubjects())
|
||||||
|
.ifPresent(
|
||||||
|
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdpolicies())
|
||||||
|
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdlanguages())
|
||||||
|
.ifPresent(
|
||||||
|
langs -> datasource
|
||||||
|
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdcontenttypes())
|
||||||
|
.ifPresent(
|
||||||
|
ctypes -> datasource
|
||||||
|
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getReleasestartdate())
|
||||||
|
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getReleaseenddate())
|
||||||
|
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getMissionstatementurl())
|
||||||
|
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatabaseaccesstype())
|
||||||
|
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatauploadtype())
|
||||||
|
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatabaseaccessrestriction())
|
||||||
|
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatauploadrestriction())
|
||||||
|
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getVersioning())
|
||||||
|
.ifPresent(v -> datasource.setVersioning(v.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getCitationguidelineurl())
|
||||||
|
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPidsystems())
|
||||||
|
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getCertificates())
|
||||||
|
.ifPresent(c -> datasource.setCertificates(c.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPolicies())
|
||||||
|
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getJournal())
|
||||||
|
.ifPresent(j -> datasource.setJournal(getContainer(j)));
|
||||||
|
|
||||||
|
return datasource;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Container getContainer(Journal j) {
|
||||||
|
Container c = new Container();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getName())
|
||||||
|
.ifPresent(n -> c.setName(n));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnPrinted())
|
||||||
|
.ifPresent(issnp -> c.setIssnPrinted(issnp));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnOnline())
|
||||||
|
.ifPresent(issno -> c.setIssnOnline(issno));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnLinking())
|
||||||
|
.ifPresent(isnl -> c.setIssnLinking(isnl));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getEp())
|
||||||
|
.ifPresent(ep -> c.setEp(ep));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIss())
|
||||||
|
.ifPresent(iss -> c.setIss(iss));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getSp())
|
||||||
|
.ifPresent(sp -> c.setSp(sp));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getVol())
|
||||||
|
.ifPresent(vol -> c.setVol(vol));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getEdition())
|
||||||
|
.ifPresent(edition -> c.setEdition(edition));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getConferencedate())
|
||||||
|
.ifPresent(cdate -> c.setConferencedate(cdate));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getConferenceplace())
|
||||||
|
.ifPresent(cplace -> c.setConferenceplace(cplace));
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
|
||||||
|
Project project = new Project();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getId())
|
||||||
|
.ifPresent(id -> project.setId(id));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getWebsiteurl())
|
||||||
|
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getCode())
|
||||||
|
.ifPresent(code -> project.setCode(code.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getAcronym())
|
||||||
|
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getTitle())
|
||||||
|
.ifPresent(title -> project.setTitle(title.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getStartdate())
|
||||||
|
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getEnddate())
|
||||||
|
.ifPresent(edate -> project.setEnddate(edate.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getCallidentifier())
|
||||||
|
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getKeywords())
|
||||||
|
.ifPresent(key -> project.setKeywords(key.getValue()));
|
||||||
|
|
||||||
|
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
|
||||||
|
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
|
||||||
|
boolean mandate = false;
|
||||||
|
if (omandate.isPresent()) {
|
||||||
|
if (omandate.get().getValue().equals("true")) {
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (oecsc39.isPresent()) {
|
||||||
|
if (oecsc39.get().getValue().equals("true")) {
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project.setOpenaccessmandateforpublications(mandate);
|
||||||
|
project.setOpenaccessmandatefordataset(false);
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getEcarticle29_3())
|
||||||
|
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
|
||||||
|
|
||||||
|
project
|
||||||
|
.setSubject(
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getSubjects())
|
||||||
|
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getSummary())
|
||||||
|
.ifPresent(summary -> project.setSummary(summary.getValue()));
|
||||||
|
|
||||||
|
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
|
||||||
|
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
|
||||||
|
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
|
||||||
|
|
||||||
|
if (ocurrency.isPresent()) {
|
||||||
|
if (ofundedamount.isPresent()) {
|
||||||
|
if (ototalcost.isPresent()) {
|
||||||
|
project
|
||||||
|
.setGranted(
|
||||||
|
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
|
||||||
|
} else {
|
||||||
|
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project
|
||||||
|
.setProgramme(
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getProgramme())
|
||||||
|
.map(
|
||||||
|
programme -> programme
|
||||||
|
.stream()
|
||||||
|
.map(pg -> Programme.newInstance(pg.getCode(), pg.getDescription()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional<List<Field<String>>> ofundTree = Optional
|
||||||
|
.ofNullable(p.getFundingtree());
|
||||||
|
List<Funder> funList = new ArrayList<>();
|
||||||
|
if (ofundTree.isPresent()) {
|
||||||
|
for (Field<String> fundingtree : ofundTree.get()) {
|
||||||
|
funList.add(getFunder(fundingtree.getValue()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
project.setFunding(funList);
|
||||||
|
|
||||||
|
return project;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Funder getFunder(String fundingtree) throws DocumentException {
|
||||||
|
Funder f = new Funder();
|
||||||
|
final Document doc;
|
||||||
|
|
||||||
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
|
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
|
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||||
|
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
// f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
|
||||||
|
|
||||||
|
String id = "";
|
||||||
|
String description = "";
|
||||||
|
// List<Levels> fundings = new ArrayList<>();
|
||||||
|
int level = 0;
|
||||||
|
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
|
||||||
|
while (nodes.size() > 0) {
|
||||||
|
for (org.dom4j.Node n : nodes) {
|
||||||
|
|
||||||
|
List node = n.selectNodes("./id");
|
||||||
|
id = ((org.dom4j.Node) node.get(0)).getText();
|
||||||
|
id = id.substring(id.indexOf("::") + 2);
|
||||||
|
|
||||||
|
node = n.selectNodes("./description");
|
||||||
|
description += ((Node) node.get(0)).getText() + " - ";
|
||||||
|
|
||||||
|
}
|
||||||
|
level += 1;
|
||||||
|
nodes = doc.selectNodes("//funding_level_" + level);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!id.equals("")) {
|
||||||
|
Fundings fundings = new Fundings();
|
||||||
|
fundings.setId(id);
|
||||||
|
fundings.setDescription(description.substring(0, description.length() - 3).trim());
|
||||||
|
f.setFunding_stream(fundings);
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), Encoders.bean(Organization.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) {
|
||||||
|
Organization organization = new Organization();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalshortname())
|
||||||
|
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalname())
|
||||||
|
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getWebsiteurl())
|
||||||
|
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getAlternativeNames())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setAlternativenames(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(v -> v.getValue())
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization.setCountry(Qualifier.newInstance(value.getClassid(), value.getClassname())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getId())
|
||||||
|
.ifPresent(value -> organization.setId(value));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setPid(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
return organization;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,197 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity.
|
||||||
|
* The new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context
|
||||||
|
* related to communities and research initiative/infrastructures.
|
||||||
|
*
|
||||||
|
* For collectedfrom elements it creates: datasource -> provides -> result and result -> isProvidedBy -> datasource
|
||||||
|
* For hostedby elements it creates: datasource -> hosts -> result and result -> isHostedBy -> datasource
|
||||||
|
* For context elements it creates: context <-> isRelatedTo <-> result
|
||||||
|
*/
|
||||||
|
public class Extractor implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<? extends Result> inputClazz,
|
||||||
|
String communityMapPath) {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
extractRelationResult(
|
||||||
|
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private <R extends Result> void extractRelationResult(SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<R> inputClazz,
|
||||||
|
CommunityMap communityMap) {
|
||||||
|
|
||||||
|
Set<Integer> hashCodes = new HashSet<>();
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.flatMap((FlatMapFunction<R, Relation>) value -> {
|
||||||
|
List<Relation> relationList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(value.getInstance())
|
||||||
|
.ifPresent(inst -> inst.forEach(instance -> {
|
||||||
|
Optional
|
||||||
|
.ofNullable(instance.getCollectedfrom())
|
||||||
|
.ifPresent(
|
||||||
|
cf -> getRelatioPair(
|
||||||
|
value, relationList, cf,
|
||||||
|
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
|
||||||
|
Optional
|
||||||
|
.ofNullable(instance.getHostedby())
|
||||||
|
.ifPresent(
|
||||||
|
hb -> getRelatioPair(
|
||||||
|
value, relationList, hb,
|
||||||
|
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
|
||||||
|
}));
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
Optional
|
||||||
|
.ofNullable(value.getContext())
|
||||||
|
.ifPresent(contexts -> contexts.forEach(context -> {
|
||||||
|
String id = context.getId();
|
||||||
|
if (id.contains(":")) {
|
||||||
|
id = id.substring(0, id.indexOf(":"));
|
||||||
|
}
|
||||||
|
if (communities.contains(id)) {
|
||||||
|
String contextId = Utils.getContextId(id);
|
||||||
|
Provenance provenance = Optional
|
||||||
|
.ofNullable(context.getDataInfo())
|
||||||
|
.map(
|
||||||
|
dinfo -> Optional
|
||||||
|
.ofNullable(dinfo.get(0).getProvenanceaction())
|
||||||
|
.map(
|
||||||
|
paction -> Provenance
|
||||||
|
.newInstance(
|
||||||
|
paction.getClassid(),
|
||||||
|
dinfo.get(0).getTrust()))
|
||||||
|
.orElse(null))
|
||||||
|
.orElse(null);
|
||||||
|
Relation r = getRelation(
|
||||||
|
value.getId(), contextId,
|
||||||
|
Constants.RESULT_ENTITY,
|
||||||
|
Constants.CONTEXT_ENTITY,
|
||||||
|
ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, provenance);
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
r = getRelation(
|
||||||
|
contextId, value.getId(),
|
||||||
|
Constants.CONTEXT_ENTITY,
|
||||||
|
Constants.RESULT_ENTITY,
|
||||||
|
ModelConstants.RELATIONSHIP,
|
||||||
|
ModelConstants.IS_RELATED_TO, provenance);
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}));
|
||||||
|
|
||||||
|
return relationList.iterator();
|
||||||
|
}, Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
|
||||||
|
String result_dtasource, String datasource_result,
|
||||||
|
Set<Integer> hashCodes) {
|
||||||
|
Provenance provenance = Optional
|
||||||
|
.ofNullable(cf.getDataInfo())
|
||||||
|
.map(
|
||||||
|
dinfo -> Optional
|
||||||
|
.ofNullable(dinfo.getProvenanceaction())
|
||||||
|
.map(
|
||||||
|
paction -> Provenance
|
||||||
|
.newInstance(
|
||||||
|
paction.getClassid(),
|
||||||
|
dinfo.getTrust()))
|
||||||
|
.orElse(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)))
|
||||||
|
.orElse(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
|
||||||
|
Relation r = getRelation(
|
||||||
|
value.getId(),
|
||||||
|
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
||||||
|
result_dtasource, ModelConstants.PROVISION,
|
||||||
|
provenance);
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
r = getRelation(
|
||||||
|
cf.getKey(), value.getId(),
|
||||||
|
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
||||||
|
datasource_result, ModelConstants.PROVISION,
|
||||||
|
provenance);
|
||||||
|
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Relation getRelation(String source, String target, String sourceType, String targetType,
|
||||||
|
String relName, String relType, Provenance provenance) {
|
||||||
|
Relation r = new Relation();
|
||||||
|
r.setSource(Node.newInstance(source, sourceType));
|
||||||
|
r.setTarget(Node.newInstance(target, targetType));
|
||||||
|
r.setReltype(RelType.newInstance(relName, relType));
|
||||||
|
r.setProvenance(provenance);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class MergedRels implements Serializable {
|
||||||
|
private String organizationId;
|
||||||
|
private String representativeId;
|
||||||
|
|
||||||
|
public String getOrganizationId() {
|
||||||
|
return organizationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrganizationId(String organizationId) {
|
||||||
|
this.organizationId = organizationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRepresentativeId() {
|
||||||
|
return representativeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRepresentativeId(String representativeId) {
|
||||||
|
this.representativeId = representativeId;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class OrganizationMap extends HashMap<String, List<String>> {
|
||||||
|
|
||||||
|
public OrganizationMap() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> get(String key) {
|
||||||
|
|
||||||
|
if (super.get(key) == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
return super.get(key);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,98 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It process the ContextInfo information to produce a new Context Entity or a set of Relations between the
|
||||||
|
* generic context entity and datasource/projects related to the context.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class Process implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(Process.class);
|
||||||
|
|
||||||
|
public static <R extends ResearchInitiative> R getEntity(ContextInfo ci) {
|
||||||
|
try {
|
||||||
|
ResearchInitiative ri;
|
||||||
|
if (ci.getType().equals("community")) {
|
||||||
|
ri = new ResearchCommunity();
|
||||||
|
((ResearchCommunity) ri).setSubject(ci.getSubject());
|
||||||
|
ri.setType(Constants.RESEARCH_COMMUNITY);
|
||||||
|
} else {
|
||||||
|
ri = new ResearchInitiative();
|
||||||
|
ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
|
||||||
|
}
|
||||||
|
ri.setId(Utils.getContextId(ci.getId()));
|
||||||
|
ri.setOriginalId(ci.getId());
|
||||||
|
|
||||||
|
ri.setDescription(ci.getDescription());
|
||||||
|
ri.setName(ci.getName());
|
||||||
|
ri.setZenodo_community(Constants.ZENODO_COMMUNITY_PREFIX + ci.getZenodocommunity());
|
||||||
|
return (R) ri;
|
||||||
|
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<Relation> getRelation(ContextInfo ci) {
|
||||||
|
try {
|
||||||
|
|
||||||
|
List<Relation> relationList = new ArrayList<>();
|
||||||
|
ci
|
||||||
|
.getDatasourceList()
|
||||||
|
.forEach(ds -> {
|
||||||
|
|
||||||
|
String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2));
|
||||||
|
|
||||||
|
String contextId = Utils.getContextId(ci.getId());
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
Relation
|
||||||
|
.newInstance(
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
|
||||||
|
Node.newInstance(ds, nodeType),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
Constants.USER_CLAIM,
|
||||||
|
Constants.DEFAULT_TRUST)));
|
||||||
|
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
Relation
|
||||||
|
.newInstance(
|
||||||
|
Node.newInstance(ds, nodeType),
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
Constants.USER_CLAIM,
|
||||||
|
Constants.DEFAULT_TRUST)));
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
return relationList;
|
||||||
|
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,132 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Element;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class QueryInformationSystem {
|
||||||
|
|
||||||
|
private ISLookUpService isLookUp;
|
||||||
|
private List<String> contextRelationResult;
|
||||||
|
|
||||||
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and $x//context/param[./@name = 'status']/text() = 'all' " +
|
||||||
|
" return " +
|
||||||
|
"$x//context";
|
||||||
|
|
||||||
|
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
|
||||||
|
+
|
||||||
|
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
|
||||||
|
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
|
||||||
|
+
|
||||||
|
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
|
||||||
|
|
||||||
|
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
|
||||||
|
|
||||||
|
isLookUp
|
||||||
|
.quickSearchProfile(XQUERY_ENTITY)
|
||||||
|
.forEach(c -> {
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
String[] cSplit = c.split("@@");
|
||||||
|
cinfo.setId(cSplit[0]);
|
||||||
|
cinfo.setName(cSplit[1]);
|
||||||
|
cinfo.setDescription(cSplit[2]);
|
||||||
|
if (!cSplit[3].trim().equals("")) {
|
||||||
|
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
|
||||||
|
}
|
||||||
|
cinfo.setZenodocommunity(cSplit[4]);
|
||||||
|
cinfo.setType(cSplit[5]);
|
||||||
|
consumer.accept(cinfo);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getContextRelationResult() {
|
||||||
|
return contextRelationResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContextRelationResult(List<String> contextRelationResult) {
|
||||||
|
this.contextRelationResult = contextRelationResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ISLookUpService getIsLookUp() {
|
||||||
|
return isLookUp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsLookUp(ISLookUpService isLookUpService) {
|
||||||
|
this.isLookUp = isLookUpService;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void execContextRelationQuery() throws ISLookUpException {
|
||||||
|
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
|
||||||
|
|
||||||
|
contextRelationResult.forEach(xml -> {
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
final Document doc;
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
doc = new SAXReader().read(new StringReader(xml));
|
||||||
|
Element root = doc.getRootElement();
|
||||||
|
cinfo.setId(root.attributeValue("id"));
|
||||||
|
|
||||||
|
Iterator it = root.elementIterator();
|
||||||
|
while (it.hasNext()) {
|
||||||
|
Element el = (Element) it.next();
|
||||||
|
if (el.getName().equals("category")) {
|
||||||
|
String categoryId = el.attributeValue("id");
|
||||||
|
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
||||||
|
if (categoryId.equals(category)) {
|
||||||
|
cinfo.setDatasourceList(getCategoryList(el, prefix));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
consumer.accept(cinfo);
|
||||||
|
} catch (DocumentException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private List<String> getCategoryList(Element el, String prefix) {
|
||||||
|
List<String> datasourceList = new ArrayList<>();
|
||||||
|
for (Object node : el.selectNodes(".//param")) {
|
||||||
|
Node n = (Node) node;
|
||||||
|
if (n.valueOf("./@name").equals("openaireId")) {
|
||||||
|
datasourceList.add(prefix + "|" + n.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return datasourceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class SparkCollectAndSave implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkCollectAndSave.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkCollectAndSave.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_collect_and_save.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath + "/result");
|
||||||
|
run(spark, inputPath, outputPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void run(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/result/publication", Result.class)
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/result/dataset", Result.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", Result.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/result/software", Result.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath + "/result");
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/publication", Relation.class)
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark Job that fires the dump for the entites
|
||||||
|
*/
|
||||||
|
public class SparkDumpEntitiesJob implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpEntitiesJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
DumpGraphEntities dg = new DumpGraphEntities();
|
||||||
|
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,111 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dumps eu.dnetlib.dhp.schema.oaf.Relation in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||||
|
*/
|
||||||
|
public class SparkDumpRelationJob implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpRelationJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpRelationJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
dumpRelation(spark, inputPath, outputPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, Relation.class)
|
||||||
|
.map(relation -> {
|
||||||
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
||||||
|
rel
|
||||||
|
.setSource(
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
relation.getSource(),
|
||||||
|
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
|
||||||
|
|
||||||
|
rel
|
||||||
|
.setTarget(
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
relation.getTarget(),
|
||||||
|
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
|
||||||
|
|
||||||
|
rel
|
||||||
|
.setReltype(
|
||||||
|
RelType
|
||||||
|
.newInstance(
|
||||||
|
relation.getRelClass(),
|
||||||
|
relation.getSubRelType()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(relation.getDataInfo())
|
||||||
|
.ifPresent(
|
||||||
|
datainfo -> rel
|
||||||
|
.setProvenance(
|
||||||
|
Provenance
|
||||||
|
.newInstance(datainfo.getProvenanceaction().getClassname(), datainfo.getTrust())));
|
||||||
|
|
||||||
|
return rel;
|
||||||
|
|
||||||
|
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark job that fires the extraction of relations from entities
|
||||||
|
*/
|
||||||
|
public class SparkExtractRelationFromEntities implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkExtractRelationFromEntities.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkExtractRelationFromEntities.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
Extractor extractor = new Extractor();
|
||||||
|
extractor.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,161 @@
|
||||||
|
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create new Relations between Context Entities and Organizations whose products are associated to the context.
|
||||||
|
* It produces relation such as: organization <-> isRelatedTo <-> context
|
||||||
|
*/
|
||||||
|
public class SparkOrganizationRelation implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkOrganizationRelation.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final OrganizationMap organizationMap = new Gson()
|
||||||
|
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
||||||
|
log.info("organization map : {}", new Gson().toJson(organizationMap));
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
extractRelation(spark, inputPath, organizationMap, outputPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
|
||||||
|
String outputPath) {
|
||||||
|
Dataset<Relation> relationDataset = Utils.readPath(spark, inputPath, Relation.class);
|
||||||
|
|
||||||
|
relationDataset.createOrReplaceTempView("relation");
|
||||||
|
|
||||||
|
List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList = new ArrayList<>();
|
||||||
|
|
||||||
|
Dataset<MergedRels> mergedRelsDataset = spark
|
||||||
|
.sql(
|
||||||
|
"SELECT target organizationId, source representativeId " +
|
||||||
|
"FROM relation " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false " +
|
||||||
|
"AND relclass = 'merges' " +
|
||||||
|
"AND substr(source, 1, 2) = '20'")
|
||||||
|
.as(Encoders.bean(MergedRels.class));
|
||||||
|
|
||||||
|
mergedRelsDataset.map((MapFunction<MergedRels, MergedRels>) mergedRels -> {
|
||||||
|
if (organizationMap.containsKey(mergedRels.getOrganizationId())) {
|
||||||
|
return mergedRels;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}, Encoders.bean(MergedRels.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collectAsList()
|
||||||
|
.forEach(getMergedRelsConsumer(organizationMap, relList));
|
||||||
|
|
||||||
|
organizationMap
|
||||||
|
.keySet()
|
||||||
|
.forEach(
|
||||||
|
oId -> organizationMap
|
||||||
|
.get(oId)
|
||||||
|
.forEach(community -> addRelations(relList, community, oId)));
|
||||||
|
|
||||||
|
spark
|
||||||
|
.createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
|
||||||
|
List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList) {
|
||||||
|
return mergedRels -> {
|
||||||
|
String oId = mergedRels.getOrganizationId();
|
||||||
|
organizationMap
|
||||||
|
.get(oId)
|
||||||
|
.forEach(community -> addRelations(relList, community, mergedRels.getRepresentativeId()));
|
||||||
|
organizationMap.remove(oId);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addRelations(List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList, String community,
|
||||||
|
String organization) {
|
||||||
|
|
||||||
|
String id = Utils.getContextId(community);
|
||||||
|
log.info("create relation for organization: {}", organization);
|
||||||
|
relList
|
||||||
|
.add(
|
||||||
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||||
|
.newInstance(
|
||||||
|
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
||||||
|
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
|
||||||
|
|
||||||
|
relList
|
||||||
|
.add(
|
||||||
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||||
|
.newInstance(
|
||||||
|
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
||||||
|
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"is",
|
||||||
|
"paramLongName":"isLookUpUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"nn",
|
||||||
|
"paramLongName":"nameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hdp",
|
||||||
|
"paramLongName": "hdfsPath",
|
||||||
|
"paramDescription": "the path used to store the output archive",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"nn",
|
||||||
|
"paramLongName":"nameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"tn",
|
||||||
|
"paramLongName":"resultTableName",
|
||||||
|
"paramDescription": "the name of the result table we are currently working on",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,431 @@
|
||||||
|
<workflow-app name="dump_community_products" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>isLookUpUrl</name>
|
||||||
|
<description>the isLookup service endpoint</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the output path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>accessToken</name>
|
||||||
|
<description>the access token used for the deposition in Zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>connectionUrl</name>
|
||||||
|
<description>the connection url for Zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>metadata</name>
|
||||||
|
<description> the metadata associated to the deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>newDeposition</name>
|
||||||
|
<description>true if it is a brand new depositon. false for new version of an old deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>conceptRecordId</name>
|
||||||
|
<description>for new version, the id of the record for the old deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<description>the target hive database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="reset_outputpath"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="reset_outputpath">
|
||||||
|
<fs>
|
||||||
|
<delete path="${outputPath}"/>
|
||||||
|
<mkdir path="${outputPath}"/>
|
||||||
|
</fs>
|
||||||
|
<ok to="save_community_map"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="save_community_map">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="fork_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<fork name="fork_dump">
|
||||||
|
<path start="dump_publication"/>
|
||||||
|
<path start="dump_dataset"/>
|
||||||
|
<path start="dump_orp"/>
|
||||||
|
<path start="dump_software"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="dump_publication">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table publication for community related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_dataset">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table dataset for community related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_orp">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table ORP for community related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_software">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table software for community related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_dump" to="prepareResultProject"/>
|
||||||
|
|
||||||
|
<action name="prepareResultProject">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Prepare association result subset of project info</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="fork_extendWithProject"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<fork name="fork_extendWithProject">
|
||||||
|
<path start="extend_publication"/>
|
||||||
|
<path start="extend_dataset"/>
|
||||||
|
<path start="extend_orp"/>
|
||||||
|
<path start="extend_software"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="extend_publication">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped publications with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="extend_dataset">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped dataset with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="extend_orp">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped ORP with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="extend_software">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped software with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_extend" to="splitForCommunities"/>
|
||||||
|
|
||||||
|
<action name="splitForCommunities">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Split dumped result for community</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/split</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="make_archive"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="make_archive">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/split</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="send_zenodo"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<action name="send_zenodo">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||||
|
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||||
|
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||||
|
<arg>--newDeposition</arg><arg>${newDeposition}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,29 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "pip",
|
||||||
|
"paramLongName": "preparedInfoPath",
|
||||||
|
"paramDescription": "the path of the association result projectlist",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,542 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"definitions": {
|
||||||
|
"AccessRight": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Label for the access mode"
|
||||||
|
},
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ControlledField": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"scheme": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The scheme for the resource"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "the value in the scheme"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"KeyValue": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"key": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of key"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Description of value"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Provenance": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"provenance": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The provenance of the information"
|
||||||
|
},
|
||||||
|
"trust": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The trust associated to the information"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"author": {
|
||||||
|
"description": "List of authors of the research results",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"affiliation": {
|
||||||
|
"description": "Affiliations of the author",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "One of the affiliation of the author"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fullname": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Fullname of the author"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "First name of the author"
|
||||||
|
},
|
||||||
|
"pid": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/ControlledField"},
|
||||||
|
{"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "The provenance of the author's pid"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Persistent identifier of the author (e.g. ORCID)"
|
||||||
|
},
|
||||||
|
"rank": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Order in which the author appears in the authors list"
|
||||||
|
},
|
||||||
|
"surname": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Surname of the author"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "One of the author of the research result"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bestaccessright": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/AccessRight"},
|
||||||
|
{"description": "The openest access right associated to the manifestations of this research results"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"codeRepositoryUrl": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Only for results with type 'software': the URL to the repository with the source code"
|
||||||
|
},
|
||||||
|
"collectedfrom": {
|
||||||
|
"description": "Information about the sources from which the record has been collected",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/KeyValue"},
|
||||||
|
{"description": "Key is the OpenAIRE identifier of the data source, value is its name"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contactgroup": {
|
||||||
|
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contactperson": {
|
||||||
|
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"container": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"conferencedate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Date of the conference"
|
||||||
|
},
|
||||||
|
"conferenceplace": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Place of the conference"
|
||||||
|
},
|
||||||
|
"edition": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Edition of the journal or conference proceeding"
|
||||||
|
},
|
||||||
|
"ep": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "End page"
|
||||||
|
},
|
||||||
|
"iss": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Journal issue"
|
||||||
|
},
|
||||||
|
"issnLinking": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Journal linking iisn"
|
||||||
|
},
|
||||||
|
"issnOnline": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Journal online issn"
|
||||||
|
},
|
||||||
|
"issnPrinted": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Journal printed issn"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Name of the journal or conference"
|
||||||
|
},
|
||||||
|
"sp": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Start page"
|
||||||
|
},
|
||||||
|
"vol": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Volume"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Container has information about the conference or journal where the result has been presented or published"
|
||||||
|
},
|
||||||
|
"context": {
|
||||||
|
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Code identifying the RI/RC"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Label of the RI/RC"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"description": "Why this result is associated to the RI/RC.",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"}
|
||||||
|
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contributor": {
|
||||||
|
"description": "Contributors of this result",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"country": {
|
||||||
|
"description": "Country associated to this result",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ISO 3166-1 alpha-2 country code"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "English label of the country"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Why this result is associated to the country."}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"coverage": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"dateofcollection": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "When OpenAIRE collected the record the last time"
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"documentationUrl": {
|
||||||
|
"description": "Only for results with type 'software': URL to the software documentation",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"embargoenddate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Date when the embargo ends and this result turns Open Access"
|
||||||
|
},
|
||||||
|
"externalReference": {
|
||||||
|
"description": "Links to external resources like entries from thematic databases (e.g. Protein Data Bank)",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Why this result is linked to the external resource"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"typology": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"geolocation": {
|
||||||
|
"description": "Geolocation information",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"box": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"place": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"point": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "OpenAIRE identifier"
|
||||||
|
},
|
||||||
|
"instance": {
|
||||||
|
"description": "Manifestations (i.e. different versions) of the result. For example: the pre-print and the published versions are two manifestations of the same research result",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"accessright": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/AccessRight"},
|
||||||
|
{"description": "Access right of this instance"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"collectedfrom": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/KeyValue"},
|
||||||
|
{"description": "Information about the source from which the instance has been collected. Key is the OpenAIRE identifier of the data source, value is its name"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hostedby": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/KeyValue"},
|
||||||
|
{"description": "Information about the source from which the instance can be viewed or downloaded. Key is the OpenAIRE identifier of the data source, value is its name"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"license": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "License applied to the instance"
|
||||||
|
},
|
||||||
|
"publicationdate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Publication date of the instance"
|
||||||
|
},
|
||||||
|
"refereed": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Was the instance subject to peer-review? Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed' (see also https://api.openaire.eu/vocabularies/dnet:review_levels)"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Type of the instance. Possible values are listed at https://api.openaire.eu/vocabularies/dnet:publication_resource"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"description":"Location where the instance is accessible",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "alpha-3/ISO 639-2 code of the language"
|
||||||
|
},
|
||||||
|
"label": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "English label"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Timestamp of last update of the record in OpenAIRE"
|
||||||
|
},
|
||||||
|
"maintitle": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Title"
|
||||||
|
},
|
||||||
|
"originalId": {
|
||||||
|
"description": "Identifiers of the record at the original sources",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pid": {
|
||||||
|
"description": "Persistent identifiers of the result",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/ControlledField"},
|
||||||
|
{"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"programmingLanguage": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Only for results with type 'software': the programming language"
|
||||||
|
},
|
||||||
|
"projects": {
|
||||||
|
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"acronym": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Project acronym"
|
||||||
|
},
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Grant code"
|
||||||
|
},
|
||||||
|
"funder": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"fundingStream": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)"
|
||||||
|
},
|
||||||
|
"jurisdiction": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Name of the funder"
|
||||||
|
},
|
||||||
|
"shortName": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Short name or acronym of the funder"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "Information about the funder funding the project"
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "OpenAIRE identifier of the project"
|
||||||
|
},
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Why this project is associated to the result"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Title of the project"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"publicationdate": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Date of publication"
|
||||||
|
},
|
||||||
|
"publisher": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Publisher"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Only for results with type 'dataset': the declared size of the dataset"
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
"description": "See definition of Dublin Core field dc:source",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subjects": {
|
||||||
|
"description": "Keywords associated to the result",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"provenance": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/Provenance"},
|
||||||
|
{"description": "Why this subject is associated to the result"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"subject": {
|
||||||
|
"allOf": [
|
||||||
|
{"$ref": "#/definitions/ControlledField"},
|
||||||
|
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary). "}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subtitle": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Sub-title of the result"
|
||||||
|
},
|
||||||
|
"tool": {
|
||||||
|
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Version of the result"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"nd",
|
||||||
|
"paramLongName":"newDeposition",
|
||||||
|
"paramDescription": "if it is a new deposition (true) or a new versione (false)",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cri",
|
||||||
|
"paramLongName":"conceptRecordId",
|
||||||
|
"paramDescription": "The id of the concept record for a new version",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"hdfsp",
|
||||||
|
"paramLongName":"hdfsPath",
|
||||||
|
"paramDescription": "the path of the folder tofind files to send to Zenodo",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hdfsnn",
|
||||||
|
"paramLongName": "hdfsNameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "at",
|
||||||
|
"paramLongName": "accessToken",
|
||||||
|
"paramDescription": "the access token for the deposition",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cu",
|
||||||
|
"paramLongName":"connectionUrl",
|
||||||
|
"paramDescription": "the url to connect to deposit",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"m",
|
||||||
|
"paramLongName":"metadata",
|
||||||
|
"paramDescription": "metadata associated to the deposition",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,24 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"is",
|
||||||
|
"paramLongName":"isLookUpUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hdfs",
|
||||||
|
"paramLongName": "hdfsPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "nn",
|
||||||
|
"paramLongName": "hdfsNameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue