forked from D-Net/dnet-hadoop
Merge branch 'dump' of https://code-repo.d4science.org/miriam.baglioni/dnet-hadoop into resolve_conflicts_pr40_dump
This commit is contained in:
commit
5b994d7ccf
|
@ -87,6 +87,11 @@
|
|||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.internal.Util;
|
||||
import okio.BufferedSink;
|
||||
import okio.Okio;
|
||||
import okio.Source;
|
||||
|
||||
public class InputStreamRequestBody extends RequestBody {
|
||||
|
||||
private InputStream inputStream;
|
||||
private MediaType mediaType;
|
||||
private long lenght;
|
||||
|
||||
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
||||
|
||||
return new InputStreamRequestBody(inputStream, mediaType, len);
|
||||
}
|
||||
|
||||
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
|
||||
this.inputStream = inputStream;
|
||||
this.mediaType = mediaType;
|
||||
this.lenght = len;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MediaType contentType() {
|
||||
return mediaType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long contentLength() {
|
||||
|
||||
return lenght;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(BufferedSink sink) throws IOException {
|
||||
Source source = null;
|
||||
try {
|
||||
source = Okio.source(inputStream);
|
||||
sink.writeAll(source);
|
||||
} finally {
|
||||
Util.closeQuietly(source);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
public class MissingConceptDoiException extends Throwable {
|
||||
public MissingConceptDoiException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,266 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.IOException;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
|
||||
import okhttp3.*;
|
||||
|
||||
public class ZenodoAPIClient implements Serializable {
|
||||
|
||||
|
||||
String urlString;
|
||||
String bucket;
|
||||
|
||||
String deposition_id;
|
||||
String access_token;
|
||||
|
||||
|
||||
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
|
||||
|
||||
public String getUrlString() {
|
||||
return urlString;
|
||||
}
|
||||
|
||||
public void setUrlString(String urlString) {
|
||||
this.urlString = urlString;
|
||||
}
|
||||
|
||||
public String getBucket() {
|
||||
return bucket;
|
||||
}
|
||||
|
||||
public void setBucket(String bucket) {
|
||||
this.bucket = bucket;
|
||||
}
|
||||
|
||||
public void setDeposition_id(String deposition_id){this.deposition_id = deposition_id;}
|
||||
|
||||
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
|
||||
|
||||
this.urlString = urlString;
|
||||
this.access_token = access_token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int newDeposition() throws IOException {
|
||||
String json = "{}";
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, json);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
// Get response body
|
||||
json = response.body().string();
|
||||
|
||||
ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
|
||||
this.bucket = newSubmission.getLinks().getBucket();
|
||||
this.deposition_id = newSubmission.getId();
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload files in Zenodo.
|
||||
* @param is the inputStream for the file to upload
|
||||
* @param file_name the name of the file as it will appear on Zenodo
|
||||
* @param len the size of the file
|
||||
* @return the response code
|
||||
*/
|
||||
public int uploadIS(InputStream is, String file_name, long len) throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(bucket + "/" + file_name)
|
||||
.addHeader("Content-Type", "application/zip") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
return response.code();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Associates metadata information to the current deposition
|
||||
* @param metadata the metadata
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int sendMretadata(String metadata) throws IOException {
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, metadata);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.put(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* To publish the current deposition. It works for both new deposition or new version of an old deposition
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int publish() throws IOException {
|
||||
|
||||
String json = "{}";
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id + "/actions/publish")
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* To create a new version of an already published deposition.
|
||||
* It sets the deposition_id and the bucket to be used for the new version.
|
||||
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is
|
||||
* the last part of the url for the DOI Zenodo suggests to use to cite all versions:
|
||||
* DOI: 10.xxx/zenodo.656930 concept_rec_id = 656930
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
* @throws MissingConceptDoiException
|
||||
*/
|
||||
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
setDepositionId(concept_rec_id);
|
||||
String json = "{}";
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||
String latest_draft = zenodoModel.getLinks().getLatest_draft();
|
||||
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
|
||||
bucket = getBucket(latest_draft);
|
||||
return response.code();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
|
||||
|
||||
for(ZenodoModel zm : zenodoModelList){
|
||||
if (zm.getConceptrecid().equals(concept_rec_id)){
|
||||
deposition_id = zm.getId();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
|
||||
|
||||
}
|
||||
|
||||
private String getPrevDepositions() throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.get()
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.body().string();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String getBucket(String url) throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(url)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.get()
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
// Get response body
|
||||
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||
|
||||
|
||||
return zenodoModel.getLinks().getBucket();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
public class Community {
|
||||
private String identifier;
|
||||
|
||||
public String getIdentifier() {
|
||||
return identifier;
|
||||
}
|
||||
|
||||
public void setIdentifier(String identifier) {
|
||||
this.identifier = identifier;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
public class Creator {
|
||||
private String affiliation;
|
||||
private String name;
|
||||
private String orcid;
|
||||
|
||||
public String getAffiliation() {
|
||||
return affiliation;
|
||||
}
|
||||
|
||||
public void setAffiliation(String affiliation) {
|
||||
this.affiliation = affiliation;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getOrcid() {
|
||||
return orcid;
|
||||
}
|
||||
|
||||
public void setOrcid(String orcid) {
|
||||
this.orcid = orcid;
|
||||
}
|
||||
|
||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||
Creator c = new Creator();
|
||||
if (!(name == null)) {
|
||||
c.name = name;
|
||||
}
|
||||
if (!(affiliation == null)) {
|
||||
c.affiliation = affiliation;
|
||||
}
|
||||
if (!(orcid == null)) {
|
||||
c.orcid = orcid;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import net.minidev.json.annotate.JsonIgnore;
|
||||
|
||||
public class File implements Serializable {
|
||||
private String checksum;
|
||||
private String filename;
|
||||
private long filesize;
|
||||
private String id;
|
||||
|
||||
@JsonIgnore
|
||||
// private Links links;
|
||||
|
||||
public String getChecksum() {
|
||||
return checksum;
|
||||
}
|
||||
|
||||
public void setChecksum(String checksum) {
|
||||
this.checksum = checksum;
|
||||
}
|
||||
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
|
||||
public void setFilename(String filename) {
|
||||
this.filename = filename;
|
||||
}
|
||||
|
||||
public long getFilesize() {
|
||||
return filesize;
|
||||
}
|
||||
|
||||
public void setFilesize(long filesize) {
|
||||
this.filesize = filesize;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
// @JsonIgnore
|
||||
// public Links getLinks() {
|
||||
// return links;
|
||||
// }
|
||||
//
|
||||
// @JsonIgnore
|
||||
// public void setLinks(Links links) {
|
||||
// this.links = links;
|
||||
// }
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Grant implements Serializable {
|
||||
private String id;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public static Grant newInstance(String id) {
|
||||
Grant g = new Grant();
|
||||
g.id = id;
|
||||
|
||||
return g;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Links implements Serializable {
|
||||
|
||||
private String bucket;
|
||||
|
||||
private String discard;
|
||||
|
||||
private String edit;
|
||||
private String files;
|
||||
private String html;
|
||||
private String latest_draft;
|
||||
private String latest_draft_html;
|
||||
private String publish;
|
||||
|
||||
private String self;
|
||||
|
||||
public String getBucket() {
|
||||
return bucket;
|
||||
}
|
||||
|
||||
public void setBucket(String bucket) {
|
||||
this.bucket = bucket;
|
||||
}
|
||||
|
||||
public String getDiscard() {
|
||||
return discard;
|
||||
}
|
||||
|
||||
public void setDiscard(String discard) {
|
||||
this.discard = discard;
|
||||
}
|
||||
|
||||
public String getEdit() {
|
||||
return edit;
|
||||
}
|
||||
|
||||
public void setEdit(String edit) {
|
||||
this.edit = edit;
|
||||
}
|
||||
|
||||
public String getFiles() {
|
||||
return files;
|
||||
}
|
||||
|
||||
public void setFiles(String files) {
|
||||
this.files = files;
|
||||
}
|
||||
|
||||
public String getHtml() {
|
||||
return html;
|
||||
}
|
||||
|
||||
public void setHtml(String html) {
|
||||
this.html = html;
|
||||
}
|
||||
|
||||
public String getLatest_draft() {
|
||||
return latest_draft;
|
||||
}
|
||||
|
||||
public void setLatest_draft(String latest_draft) {
|
||||
this.latest_draft = latest_draft;
|
||||
}
|
||||
|
||||
public String getLatest_draft_html() {
|
||||
return latest_draft_html;
|
||||
}
|
||||
|
||||
public void setLatest_draft_html(String latest_draft_html) {
|
||||
this.latest_draft_html = latest_draft_html;
|
||||
}
|
||||
|
||||
public String getPublish() {
|
||||
return publish;
|
||||
}
|
||||
|
||||
public void setPublish(String publish) {
|
||||
this.publish = publish;
|
||||
}
|
||||
|
||||
public String getSelf() {
|
||||
return self;
|
||||
}
|
||||
|
||||
public void setSelf(String self) {
|
||||
this.self = self;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class Metadata implements Serializable {
|
||||
|
||||
private String access_right;
|
||||
private List<Community> communities;
|
||||
private List<Creator> creators;
|
||||
private String description;
|
||||
private String doi;
|
||||
private List<Grant> grants;
|
||||
private List<String> keywords;
|
||||
private String language;
|
||||
private String license;
|
||||
private PrereserveDoi prereserve_doi;
|
||||
private String publication_date;
|
||||
private List<String> references;
|
||||
private List<RelatedIdentifier> related_identifiers;
|
||||
private String title;
|
||||
private String upload_type;
|
||||
private String version;
|
||||
|
||||
public String getUpload_type() {
|
||||
return upload_type;
|
||||
}
|
||||
|
||||
public void setUpload_type(String upload_type) {
|
||||
this.upload_type = upload_type;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(String version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public String getAccess_right() {
|
||||
return access_right;
|
||||
}
|
||||
|
||||
public void setAccess_right(String access_right) {
|
||||
this.access_right = access_right;
|
||||
}
|
||||
|
||||
public List<Community> getCommunities() {
|
||||
return communities;
|
||||
}
|
||||
|
||||
public void setCommunities(List<Community> communities) {
|
||||
this.communities = communities;
|
||||
}
|
||||
|
||||
public List<Creator> getCreators() {
|
||||
return creators;
|
||||
}
|
||||
|
||||
public void setCreators(List<Creator> creators) {
|
||||
this.creators = creators;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public List<Grant> getGrants() {
|
||||
return grants;
|
||||
}
|
||||
|
||||
public void setGrants(List<Grant> grants) {
|
||||
this.grants = grants;
|
||||
}
|
||||
|
||||
public List<String> getKeywords() {
|
||||
return keywords;
|
||||
}
|
||||
|
||||
public void setKeywords(List<String> keywords) {
|
||||
this.keywords = keywords;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public String getLicense() {
|
||||
return license;
|
||||
}
|
||||
|
||||
public void setLicense(String license) {
|
||||
this.license = license;
|
||||
}
|
||||
|
||||
public PrereserveDoi getPrereserve_doi() {
|
||||
return prereserve_doi;
|
||||
}
|
||||
|
||||
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
|
||||
this.prereserve_doi = prereserve_doi;
|
||||
}
|
||||
|
||||
public String getPublication_date() {
|
||||
return publication_date;
|
||||
}
|
||||
|
||||
public void setPublication_date(String publication_date) {
|
||||
this.publication_date = publication_date;
|
||||
}
|
||||
|
||||
public List<String> getReferences() {
|
||||
return references;
|
||||
}
|
||||
|
||||
public void setReferences(List<String> references) {
|
||||
this.references = references;
|
||||
}
|
||||
|
||||
public List<RelatedIdentifier> getRelated_identifiers() {
|
||||
return related_identifiers;
|
||||
}
|
||||
|
||||
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
|
||||
this.related_identifiers = related_identifiers;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class PrereserveDoi implements Serializable {
|
||||
private String doi;
|
||||
private String recid;
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public String getRecid() {
|
||||
return recid;
|
||||
}
|
||||
|
||||
public void setRecid(String recid) {
|
||||
this.recid = recid;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class RelatedIdentifier implements Serializable {
|
||||
private String identifier;
|
||||
private String relation;
|
||||
private String resource_type;
|
||||
private String scheme;
|
||||
|
||||
public String getIdentifier() {
|
||||
return identifier;
|
||||
}
|
||||
|
||||
public void setIdentifier(String identifier) {
|
||||
this.identifier = identifier;
|
||||
}
|
||||
|
||||
public String getRelation() {
|
||||
return relation;
|
||||
}
|
||||
|
||||
public void setRelation(String relation) {
|
||||
this.relation = relation;
|
||||
}
|
||||
|
||||
public String getResource_type() {
|
||||
return resource_type;
|
||||
}
|
||||
|
||||
public void setResource_type(String resource_type) {
|
||||
this.resource_type = resource_type;
|
||||
}
|
||||
|
||||
public String getScheme() {
|
||||
return scheme;
|
||||
}
|
||||
|
||||
public void setScheme(String scheme) {
|
||||
this.scheme = scheme;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class ZenodoModel implements Serializable {
|
||||
|
||||
private String conceptrecid;
|
||||
private String created;
|
||||
|
||||
private List<File> files;
|
||||
private String id;
|
||||
private Links links;
|
||||
private Metadata metadata;
|
||||
private String modified;
|
||||
private String owner;
|
||||
private String record_id;
|
||||
private String state;
|
||||
private boolean submitted;
|
||||
private String title;
|
||||
|
||||
public String getConceptrecid() {
|
||||
return conceptrecid;
|
||||
}
|
||||
|
||||
public void setConceptrecid(String conceptrecid) {
|
||||
this.conceptrecid = conceptrecid;
|
||||
}
|
||||
|
||||
public String getCreated() {
|
||||
return created;
|
||||
}
|
||||
|
||||
public void setCreated(String created) {
|
||||
this.created = created;
|
||||
}
|
||||
|
||||
public List<File> getFiles() {
|
||||
return files;
|
||||
}
|
||||
|
||||
public void setFiles(List<File> files) {
|
||||
this.files = files;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Links getLinks() {
|
||||
return links;
|
||||
}
|
||||
|
||||
public void setLinks(Links links) {
|
||||
this.links = links;
|
||||
}
|
||||
|
||||
public Metadata getMetadata() {
|
||||
return metadata;
|
||||
}
|
||||
|
||||
public void setMetadata(Metadata metadata) {
|
||||
this.metadata = metadata;
|
||||
}
|
||||
|
||||
public String getModified() {
|
||||
return modified;
|
||||
}
|
||||
|
||||
public void setModified(String modified) {
|
||||
this.modified = modified;
|
||||
}
|
||||
|
||||
public String getOwner() {
|
||||
return owner;
|
||||
}
|
||||
|
||||
public void setOwner(String owner) {
|
||||
this.owner = owner;
|
||||
}
|
||||
|
||||
public String getRecord_id() {
|
||||
return record_id;
|
||||
}
|
||||
|
||||
public void setRecord_id(String record_id) {
|
||||
this.record_id = record_id;
|
||||
}
|
||||
|
||||
public String getState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
public void setState(String state) {
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public boolean isSubmitted() {
|
||||
return submitted;
|
||||
}
|
||||
|
||||
public void setSubmitted(boolean submitted) {
|
||||
this.submitted = submitted;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class ZenodoModelList extends ArrayList<ZenodoModel> {
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@Disabled
|
||||
public class ZenodoAPIClientTest {
|
||||
|
||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||
private final String ACCESS_TOKEN = "";
|
||||
|
||||
private final String CONCEPT_REC_ID = "657113";
|
||||
|
||||
@Test
|
||||
public void testNewDeposition() throws IOException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
Assertions.assertEquals(201, client.newDeposition());
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
|
||||
|
||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
||||
|
||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
|
||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
|
||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"metadata":{"access_right":"open","communities":[{"identifier":"openaire-research-graph"}],"creators":[{"affiliation":"ISTI - CNR","name":"Bardi, Alessia","orcid":"0000-0002-1112-1292"},{"affiliation":"eifl", "name":"Kuchma, Iryna"},{"affiliation":"BIH", "name":"Brobov, Evgeny"},{"affiliation":"GIDIF RBM", "name":"Truccolo, Ivana"},{"affiliation":"unesp", "name":"Monteiro, Elizabete"},{"affiliation":"und", "name":"Casalegno, Carlotta"},{"affiliation":"CARL ABRC", "name":"Clary, Erin"},{"affiliation":"The University of Edimburgh", "name":"Romanowski, Andrew"},{"affiliation":"ISTI - CNR", "name":"Pavone, Gina"},{"affiliation":"ISTI - CNR", "name":"Artini, Michele"},{"affiliation":"ISTI - CNR","name":"Atzori, Claudio","orcid":"0000-0001-9613-6639"},{"affiliation":"University of Bielefeld","name":"Bäcker, Amelie","orcid":"0000-0001-6015-2063"},{"affiliation":"ISTI - CNR","name":"Baglioni, Miriam","orcid":"0000-0002-2273-9004"},{"affiliation":"University of Bielefeld","name":"Czerniak, Andreas","orcid":"0000-0003-3883-4169"},{"affiliation":"ISTI - CNR","name":"De Bonis, Michele"},{"affiliation":"Athena Research and Innovation Centre","name":"Dimitropoulos, Harry"},{"affiliation":"Athena Research and Innovation Centre","name":"Foufoulas, Ioannis"},{"affiliation":"University of Warsaw","name":"Horst, Marek"},{"affiliation":"Athena Research and Innovation Centre","name":"Iatropoulou, Katerina"},{"affiliation":"University of Warsaw","name":"Jacewicz, Przemyslaw"},{"affiliation":"Athena Research and Innovation Centre","name":"Kokogiannaki, Argiro", "orcid":"0000-0002-3880-0244"},{"affiliation":"ISTI - CNR","name":"La Bruzzo, Sandro","orcid":"0000-0003-2855-1245"},{"affiliation":"ISTI - CNR","name":"Lazzeri, Emma"},{"affiliation":"University of Bielefeld","name":"Löhden, Aenne"},{"affiliation":"ISTI - CNR","name":"Manghi, Paolo","orcid":"0000-0001-7291-3210"},{"affiliation":"ISTI - CNR","name":"Mannocci, Andrea","orcid":"0000-0002-5193-7851"},{"affiliation":"Athena Research and Innovation Center","name":"Manola, Natalia"},{"affiliation":"ISTI - CNR","name":"Ottonello, Enrico"},{"affiliation":"University of Bielefeld","name":"Shirrwagen, Jochen"}],"description":"\\u003cp\\u003eThis dump provides access to the metadata records of publications, research data, software and projects that may be relevant to the Corona Virus Disease (COVID-19) fight. The dump contains records of the OpenAIRE COVID-19 Gateway (https://covid-19.openaire.eu/), identified via full-text mining and inference techniques applied to the OpenAIRE Research Graph (https://explore.openaire.eu/). The Graph is one of the largest Open Access collections of metadata records and links between publications, datasets, software, projects, funders, and organizations, aggregating 12,000+ scientific data sources world-wide, among which the Covid-19 data sources Zenodo COVID-19 Community, WHO (World Health Organization), BIP! FInder for COVID-19, Protein Data Bank, Dimensions, scienceOpen, and RSNA. \\u003cp\\u003eThe dump consists of a gzip file containing one json per line. Each json is compliant to the schema available at https://doi.org/10.5281/zenodo.3974226\\u003c/p\\u003e ","title":"OpenAIRE Covid-19 publications, datasets, software and projects metadata.","upload_type":"dataset","version":"1.0"}}
|
|
@ -0,0 +1 @@
|
|||
This is a test for a new deposition
|
|
@ -0,0 +1 @@
|
|||
This is a test for a new version of an old deposition
|
|
@ -0,0 +1,2 @@
|
|||
This is a test for a new version of an old deposition. This should replace the other new version. I expect to have only two
|
||||
files in the deposition
|
|
@ -79,6 +79,15 @@ public class ModelSupport {
|
|||
entityIdPrefix.put("result", "50");
|
||||
}
|
||||
|
||||
public static final Map<String, String> idPrefixEntity = Maps.newHashMap();
|
||||
|
||||
static {
|
||||
idPrefixEntity.put("10", "datasource");
|
||||
idPrefixEntity.put("20", "organization");
|
||||
idPrefixEntity.put("40", "project");
|
||||
idPrefixEntity.put("50", "result");
|
||||
}
|
||||
|
||||
public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
|
||||
|
||||
static {
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
|
||||
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount
|
||||
*/
|
||||
public class APC implements Serializable {
|
||||
private String currency;
|
||||
private String amount;
|
||||
|
||||
public String getCurrency() {
|
||||
return currency;
|
||||
}
|
||||
|
||||
public void setCurrency(String currency) {
|
||||
this.currency = currency;
|
||||
}
|
||||
|
||||
public String getAmount() {
|
||||
return amount;
|
||||
}
|
||||
|
||||
public void setAmount(String amount) {
|
||||
this.amount = amount;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
/**
|
||||
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier
|
||||
* element with a parameter scheme of type String to store the scheme. Values for this element are found against the
|
||||
* COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
|
||||
* the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
|
||||
* COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
|
||||
* access right scheme
|
||||
*/
|
||||
public class AccessRight extends Qualifier {
|
||||
|
||||
private String scheme;
|
||||
|
||||
public String getScheme() {
|
||||
return scheme;
|
||||
}
|
||||
|
||||
public void setScheme(String scheme) {
|
||||
this.scheme = scheme;
|
||||
}
|
||||
|
||||
public static AccessRight newInstance(String code, String label, String scheme) {
|
||||
AccessRight ar = new AccessRight();
|
||||
ar.setCode(code);
|
||||
ar.setLabel(label);
|
||||
ar.setScheme(scheme);
|
||||
return ar;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
|
||||
* name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
|
||||
* type String to store the family name of the author. The value for this parameter corresponds to
|
||||
* eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
|
||||
* this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
|
||||
* the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
|
||||
* rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
|
||||
* moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
|
||||
* eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
|
||||
* instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
|
||||
* instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
|
||||
* dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
|
||||
*/
|
||||
public class Author implements Serializable {
|
||||
|
||||
private String fullname;
|
||||
|
||||
private String name;
|
||||
|
||||
private String surname;
|
||||
|
||||
private Integer rank;
|
||||
|
||||
private Pid pid;
|
||||
|
||||
public String getFullname() {
|
||||
return fullname;
|
||||
}
|
||||
|
||||
public void setFullname(String fullname) {
|
||||
this.fullname = fullname;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getSurname() {
|
||||
return surname;
|
||||
}
|
||||
|
||||
public void setSurname(String surname) {
|
||||
this.surname = surname;
|
||||
}
|
||||
|
||||
public Integer getRank() {
|
||||
return rank;
|
||||
}
|
||||
|
||||
public void setRank(Integer rank) {
|
||||
this.rank = rank;
|
||||
}
|
||||
|
||||
public Pid getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(Pid pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* To store information about the conference or journal where the result has been presented or published. It contains
|
||||
* eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
|
||||
* parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
|
||||
* It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
|
||||
* the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
|
||||
* issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
|
||||
* iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
|
||||
* sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
|
||||
* of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
|
||||
* proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
|
||||
* String to store the place of the conference. It corresponds to the parameter conferenceplace of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
|
||||
* to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
|
||||
*/
|
||||
public class Container implements Serializable {
|
||||
|
||||
private String name;
|
||||
|
||||
private String issnPrinted;
|
||||
|
||||
private String issnOnline;
|
||||
|
||||
private String issnLinking;
|
||||
|
||||
private String ep;
|
||||
|
||||
private String iss;
|
||||
|
||||
private String sp;
|
||||
|
||||
private String vol;
|
||||
|
||||
private String edition;
|
||||
|
||||
private String conferenceplace;
|
||||
|
||||
private String conferencedate;
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getIssnPrinted() {
|
||||
return issnPrinted;
|
||||
}
|
||||
|
||||
public void setIssnPrinted(String issnPrinted) {
|
||||
this.issnPrinted = issnPrinted;
|
||||
}
|
||||
|
||||
public String getIssnOnline() {
|
||||
return issnOnline;
|
||||
}
|
||||
|
||||
public void setIssnOnline(String issnOnline) {
|
||||
this.issnOnline = issnOnline;
|
||||
}
|
||||
|
||||
public String getIssnLinking() {
|
||||
return issnLinking;
|
||||
}
|
||||
|
||||
public void setIssnLinking(String issnLinking) {
|
||||
this.issnLinking = issnLinking;
|
||||
}
|
||||
|
||||
public String getEp() {
|
||||
return ep;
|
||||
}
|
||||
|
||||
public void setEp(String ep) {
|
||||
this.ep = ep;
|
||||
}
|
||||
|
||||
public String getIss() {
|
||||
return iss;
|
||||
}
|
||||
|
||||
public void setIss(String iss) {
|
||||
this.iss = iss;
|
||||
}
|
||||
|
||||
public String getSp() {
|
||||
return sp;
|
||||
}
|
||||
|
||||
public void setSp(String sp) {
|
||||
this.sp = sp;
|
||||
}
|
||||
|
||||
public String getVol() {
|
||||
return vol;
|
||||
}
|
||||
|
||||
public void setVol(String vol) {
|
||||
this.vol = vol;
|
||||
}
|
||||
|
||||
public String getEdition() {
|
||||
return edition;
|
||||
}
|
||||
|
||||
public void setEdition(String edition) {
|
||||
this.edition = edition;
|
||||
}
|
||||
|
||||
public String getConferenceplace() {
|
||||
return conferenceplace;
|
||||
}
|
||||
|
||||
public void setConferenceplace(String conferenceplace) {
|
||||
this.conferenceplace = conferenceplace;
|
||||
}
|
||||
|
||||
public String getConferencedate() {
|
||||
return conferencedate;
|
||||
}
|
||||
|
||||
public void setConferencedate(String conferencedate) {
|
||||
this.conferencedate = conferencedate;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: -
|
||||
* scheme of type String to store the scheme - value of type String to store the value in that scheme
|
||||
*/
|
||||
public class ControlledField implements Serializable {
|
||||
private String scheme;
|
||||
private String value;
|
||||
|
||||
public String getScheme() {
|
||||
return scheme;
|
||||
}
|
||||
|
||||
public void setScheme(String scheme) {
|
||||
this.scheme = scheme;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static ControlledField newInstance(String scheme, String value) {
|
||||
ControlledField cf = new ControlledField();
|
||||
|
||||
cf.setScheme(scheme);
|
||||
cf.setValue(value);
|
||||
|
||||
return cf;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
/**
|
||||
* Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
|
||||
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
|
||||
* result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
|
||||
* to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
|
||||
* eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
|
||||
* dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
|
||||
* datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
|
||||
*/
|
||||
public class Country extends Qualifier {
|
||||
|
||||
private Provenance provenance;
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public static Country newInstance(String code, String label, Provenance provenance) {
|
||||
Country c = new Country();
|
||||
c.setProvenance(provenance);
|
||||
c.setCode(code);
|
||||
c.setLabel(label);
|
||||
return c;
|
||||
}
|
||||
|
||||
public static Country newInstance(String code, String label, String provenance, String trust) {
|
||||
return newInstance(code, label, Provenance.newInstance(provenance, trust));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Funder implements Serializable {
|
||||
private String shortName;
|
||||
|
||||
private String name;
|
||||
|
||||
private String jurisdiction;
|
||||
|
||||
public String getJurisdiction() {
|
||||
return jurisdiction;
|
||||
}
|
||||
|
||||
public void setJurisdiction(String jurisdiction) {
|
||||
this.jurisdiction = jurisdiction;
|
||||
}
|
||||
|
||||
public String getShortName() {
|
||||
return shortName;
|
||||
}
|
||||
|
||||
public void setShortName(String shortName) {
|
||||
this.shortName = shortName;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* Represents the geolocation information. It has three parameters: - point of type String to store the point
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
|
||||
*/
|
||||
public class GeoLocation implements Serializable {
|
||||
|
||||
private String point;
|
||||
|
||||
private String box;
|
||||
|
||||
private String place;
|
||||
|
||||
public String getPoint() {
|
||||
return point;
|
||||
}
|
||||
|
||||
public void setPoint(String point) {
|
||||
this.point = point;
|
||||
}
|
||||
|
||||
public String getBox() {
|
||||
return box;
|
||||
}
|
||||
|
||||
public void setBox(String box) {
|
||||
this.box = box;
|
||||
}
|
||||
|
||||
public String getPlace() {
|
||||
return place;
|
||||
}
|
||||
|
||||
public void setPlace(String place) {
|
||||
this.place = place;
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public boolean isBlank() {
|
||||
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
|
||||
* versions are two manifestations of the same research result. It has the following parameters: - license of type
|
||||
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
|
||||
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
|
||||
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
|
||||
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - hostedby of
|
||||
* type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance can be
|
||||
* viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - key corresponds
|
||||
* to hostedby.key - value corresponds to hostedby.value - url of type List<String> list of locations where the instance
|
||||
* is accessible. It corresponds to url of the instance to be dumped - collectedfrom of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
|
||||
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
|
||||
* collectedfrom.key - value corresponds to collectedfrom.value - publicationdate of type String to store the
|
||||
* publication date of the instance ;// dateofacceptance; - refereed of type String to store information abour tthe
|
||||
* review status of the instance. Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed'. It corresponds to
|
||||
* refereed.classname of the instance to be dumped
|
||||
*/
|
||||
public class Instance implements Serializable {
|
||||
|
||||
private String license;
|
||||
|
||||
private AccessRight accessright;
|
||||
|
||||
private String type;
|
||||
|
||||
private KeyValue hostedby;
|
||||
|
||||
private List<String> url;
|
||||
|
||||
private KeyValue collectedfrom;
|
||||
|
||||
private String publicationdate;// dateofacceptance;
|
||||
|
||||
private String refereed; // peer-review status
|
||||
|
||||
public String getLicense() {
|
||||
return license;
|
||||
}
|
||||
|
||||
public void setLicense(String license) {
|
||||
this.license = license;
|
||||
}
|
||||
|
||||
public AccessRight getAccessright() {
|
||||
return accessright;
|
||||
}
|
||||
|
||||
public void setAccessright(AccessRight accessright) {
|
||||
this.accessright = accessright;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public KeyValue getHostedby() {
|
||||
return hostedby;
|
||||
}
|
||||
|
||||
public void setHostedby(KeyValue hostedby) {
|
||||
this.hostedby = hostedby;
|
||||
}
|
||||
|
||||
public List<String> getUrl() {
|
||||
return url;
|
||||
}
|
||||
|
||||
public void setUrl(List<String> url) {
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public KeyValue getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(KeyValue collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public String getPublicationdate() {
|
||||
return publicationdate;
|
||||
}
|
||||
|
||||
public void setPublicationdate(String publicationdate) {
|
||||
this.publicationdate = publicationdate;
|
||||
}
|
||||
|
||||
public String getRefereed() {
|
||||
return refereed;
|
||||
}
|
||||
|
||||
public void setRefereed(String refereed) {
|
||||
this.refereed = refereed;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* To represent the information described by a key and a value. It has two parameters: - key to store the key (generally
|
||||
* the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key)
|
||||
*/
|
||||
public class KeyValue implements Serializable {
|
||||
|
||||
private String key;
|
||||
|
||||
private String value;
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public void setKey(String key) {
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static KeyValue newInstance(String key, String value) {
|
||||
KeyValue inst = new KeyValue();
|
||||
inst.key = key;
|
||||
inst.value = value;
|
||||
return inst;
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public boolean isBlank() {
|
||||
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the generic persistent identifier. It has two parameters: - id of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. -
|
||||
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
|
||||
*/
|
||||
public class Pid implements Serializable {
|
||||
private ControlledField id;
|
||||
private Provenance provenance;
|
||||
|
||||
public ControlledField getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(ControlledField pid) {
|
||||
this.id = pid;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public static Pid newInstance(ControlledField pid, Provenance provenance) {
|
||||
Pid p = new Pid();
|
||||
p.id = pid;
|
||||
p.provenance = provenance;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
public static Pid newInstance(ControlledField pid) {
|
||||
Pid p = new Pid();
|
||||
p.id = pid;
|
||||
|
||||
return p;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Project implements Serializable {
|
||||
protected String id;// OpenAIRE id
|
||||
protected String code;
|
||||
|
||||
protected String acronym;
|
||||
|
||||
protected String title;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Indicates the process that produced (or provided) the information, and the trust associated to the information. It
|
||||
* has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
|
||||
* store the trust associated to the information
|
||||
*/
|
||||
public class Provenance implements Serializable {
|
||||
private String provenance;
|
||||
private String trust;
|
||||
|
||||
public String getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(String provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public String getTrust() {
|
||||
return trust;
|
||||
}
|
||||
|
||||
public void setTrust(String trust) {
|
||||
this.trust = trust;
|
||||
}
|
||||
|
||||
public static Provenance newInstance(String provenance, String trust) {
|
||||
Provenance p = new Provenance();
|
||||
p.provenance = provenance;
|
||||
p.trust = trust;
|
||||
return p;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return provenance + trust;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* To represent the information described by a code and a value It has two parameters: - code to store the code
|
||||
* (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the
|
||||
* classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element
|
||||
*/
|
||||
public class Qualifier implements Serializable {
|
||||
|
||||
private String code; // the classid in the Qualifier
|
||||
private String label; // the classname in the Qualifier
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getLabel() {
|
||||
return label;
|
||||
}
|
||||
|
||||
public void setLabel(String label) {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public static Qualifier newInstance(String code, String value) {
|
||||
Qualifier qualifier = new Qualifier();
|
||||
qualifier.setCode(code);
|
||||
qualifier.setLabel(value);
|
||||
return qualifier;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,391 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
|
||||
/**
|
||||
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
|
||||
* Initiative/Infrastructures. It has the following parameters: - author of type
|
||||
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
|
||||
* represented in the internal model one author in the esternal model is produced. - type of type String to represent
|
||||
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
|
||||
* resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store
|
||||
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
|
||||
* corresponds to language.classname - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
|
||||
* list to which the result is associated. For each country in the result respresented in the internal model one country
|
||||
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
|
||||
* the result. For each subject in the result represented in the internal model one subject in the external model is
|
||||
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
|
||||
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
|
||||
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
|
||||
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
|
||||
* description.value in the result represented in the internal model - publicationdate of type String to store the
|
||||
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
|
||||
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
|
||||
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
|
||||
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
|
||||
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
|
||||
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
|
||||
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
|
||||
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
|
||||
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
|
||||
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
|
||||
* internal model - instance of type List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated
|
||||
* to the result. It corresponds to the same parameter in the result represented in the internal model - container of
|
||||
* type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It corresponds to the parameter
|
||||
* journal of the result represented in the internal model - documentationUrl of type List<String> (only for results of
|
||||
* type software) to store the URLs to the software documentation. It corresponds to the list of documentationUrl.value
|
||||
* of the result represented in the internal model - codeRepositoryUrl of type String (only for results of type
|
||||
* software) to store the URL to the repository with the source code. It corresponds to codeRepositoryUrl.value of the
|
||||
* result represented in the internal model - programmingLanguage of type String (only for results of type software) to
|
||||
* store the programming language. It corresponds to programmingLanguaga.classid of the result represented in the
|
||||
* internal model - contactperson of type List<String> (only for results of type other) to store the contact person for
|
||||
* this result. It corresponds to the list of contactperson.value of the result represented in the internal model -
|
||||
* contactgroup of type List<String> (only for results of type other) to store the information for the contact group. It
|
||||
* corresponds to the list of contactgroup.value of the result represented in the internal model - tool of type
|
||||
* List<String> (only fro results of type other) to store information about tool useful for the interpretation and/or
|
||||
* re-used of the research product. It corresponds to the list of tool.value in the result represented in the internal
|
||||
* modelt - size of type String (only for results of type dataset) to store the size of the dataset. It corresponds to
|
||||
* size.value in the result represented in the internal model - version of type String (only for results of type
|
||||
* dataset) to store the version. It corresponds to version.value of the result represented in the internal model -
|
||||
* geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type dataset) to store
|
||||
* geolocation information. For each geolocation element in the result represented in the internal model a GeoLocation
|
||||
* in the external model il produced - id of type String to store the OpenAIRE id of the result. It corresponds to the
|
||||
* id of the result represented in the internal model - originalId of type List<String> to store the original ids of the
|
||||
* result. It corresponds to the originalId of the result represented in the internal model - pid of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For each pid
|
||||
* in the results represented in the internal model one pid in the external model is produced. The value correspondence
|
||||
* is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - value corresponds
|
||||
* to the pid.value of the result represented in the internal model - dateofcollection of type String to store
|
||||
* information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
|
||||
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
|
||||
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
|
||||
*/
|
||||
public class Result implements Serializable {
|
||||
|
||||
private List<Author> author;
|
||||
|
||||
// resulttype allows subclassing results into publications | datasets | software
|
||||
private String type; // resulttype
|
||||
|
||||
// common fields
|
||||
private Qualifier language;
|
||||
|
||||
private List<Country> country;
|
||||
|
||||
private List<Subject> subjects;
|
||||
|
||||
private String maintitle;
|
||||
|
||||
private String subtitle;
|
||||
|
||||
private List<String> description;
|
||||
|
||||
private String publicationdate; // dateofacceptance;
|
||||
|
||||
private String publisher;
|
||||
|
||||
private String embargoenddate;
|
||||
|
||||
private List<String> source;
|
||||
|
||||
private List<String> format;
|
||||
|
||||
private List<String> contributor;
|
||||
|
||||
private List<String> coverage;
|
||||
|
||||
private AccessRight bestaccessright;
|
||||
|
||||
private List<Instance> instance;
|
||||
|
||||
private Container container;// Journal
|
||||
|
||||
private List<String> documentationUrl; // software
|
||||
|
||||
private String codeRepositoryUrl; // software
|
||||
|
||||
private String programmingLanguage; // software
|
||||
|
||||
private List<String> contactperson; // orp
|
||||
|
||||
private List<String> contactgroup; // orp
|
||||
|
||||
private List<String> tool; // orp
|
||||
|
||||
private String size; // dataset
|
||||
|
||||
private String version; // dataset
|
||||
|
||||
private List<GeoLocation> geolocation; // dataset
|
||||
|
||||
private String id;
|
||||
|
||||
private List<String> originalId;
|
||||
|
||||
private List<ControlledField> pid;
|
||||
|
||||
private String dateofcollection;
|
||||
|
||||
private Long lastupdatetimestamp;
|
||||
|
||||
public Long getLastupdatetimestamp() {
|
||||
return lastupdatetimestamp;
|
||||
}
|
||||
|
||||
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
|
||||
this.lastupdatetimestamp = lastupdatetimestamp;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(List<String> originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public String getDateofcollection() {
|
||||
return dateofcollection;
|
||||
}
|
||||
|
||||
public void setDateofcollection(String dateofcollection) {
|
||||
this.dateofcollection = dateofcollection;
|
||||
}
|
||||
|
||||
public List<Author> getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public Container getContainer() {
|
||||
return container;
|
||||
}
|
||||
|
||||
public void setContainer(Container container) {
|
||||
this.container = container;
|
||||
}
|
||||
|
||||
public void setAuthor(List<Author> author) {
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
public Qualifier getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(Qualifier language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public List<Country> getCountry() {
|
||||
return country;
|
||||
}
|
||||
|
||||
public void setCountry(List<Country> country) {
|
||||
this.country = country;
|
||||
}
|
||||
|
||||
public List<Subject> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(List<Subject> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public String getMaintitle() {
|
||||
return maintitle;
|
||||
}
|
||||
|
||||
public void setMaintitle(String maintitle) {
|
||||
this.maintitle = maintitle;
|
||||
}
|
||||
|
||||
public String getSubtitle() {
|
||||
return subtitle;
|
||||
}
|
||||
|
||||
public void setSubtitle(String subtitle) {
|
||||
this.subtitle = subtitle;
|
||||
}
|
||||
|
||||
public List<String> getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(List<String> description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public String getPublicationdate() {
|
||||
return publicationdate;
|
||||
}
|
||||
|
||||
public void setPublicationdate(String publicationdate) {
|
||||
this.publicationdate = publicationdate;
|
||||
}
|
||||
|
||||
public String getPublisher() {
|
||||
return publisher;
|
||||
}
|
||||
|
||||
public void setPublisher(String publisher) {
|
||||
this.publisher = publisher;
|
||||
}
|
||||
|
||||
public String getEmbargoenddate() {
|
||||
return embargoenddate;
|
||||
}
|
||||
|
||||
public void setEmbargoenddate(String embargoenddate) {
|
||||
this.embargoenddate = embargoenddate;
|
||||
}
|
||||
|
||||
public List<String> getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public void setSource(List<String> source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public List<String> getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setFormat(List<String> format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public List<String> getContributor() {
|
||||
return contributor;
|
||||
}
|
||||
|
||||
public void setContributor(List<String> contributor) {
|
||||
this.contributor = contributor;
|
||||
}
|
||||
|
||||
public List<String> getCoverage() {
|
||||
return coverage;
|
||||
}
|
||||
|
||||
public void setCoverage(List<String> coverage) {
|
||||
this.coverage = coverage;
|
||||
}
|
||||
|
||||
public AccessRight getBestaccessright() {
|
||||
return bestaccessright;
|
||||
}
|
||||
|
||||
public void setBestaccessright(AccessRight bestaccessright) {
|
||||
this.bestaccessright = bestaccessright;
|
||||
}
|
||||
|
||||
public List<Instance> getInstance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
public void setInstance(List<Instance> instance) {
|
||||
this.instance = instance;
|
||||
}
|
||||
|
||||
public List<String> getDocumentationUrl() {
|
||||
return documentationUrl;
|
||||
}
|
||||
|
||||
public void setDocumentationUrl(List<String> documentationUrl) {
|
||||
this.documentationUrl = documentationUrl;
|
||||
}
|
||||
|
||||
public String getCodeRepositoryUrl() {
|
||||
return codeRepositoryUrl;
|
||||
}
|
||||
|
||||
public void setCodeRepositoryUrl(String codeRepositoryUrl) {
|
||||
this.codeRepositoryUrl = codeRepositoryUrl;
|
||||
}
|
||||
|
||||
public String getProgrammingLanguage() {
|
||||
return programmingLanguage;
|
||||
}
|
||||
|
||||
public void setProgrammingLanguage(String programmingLanguage) {
|
||||
this.programmingLanguage = programmingLanguage;
|
||||
}
|
||||
|
||||
public List<String> getContactperson() {
|
||||
return contactperson;
|
||||
}
|
||||
|
||||
public void setContactperson(List<String> contactperson) {
|
||||
this.contactperson = contactperson;
|
||||
}
|
||||
|
||||
public List<String> getContactgroup() {
|
||||
return contactgroup;
|
||||
}
|
||||
|
||||
public void setContactgroup(List<String> contactgroup) {
|
||||
this.contactgroup = contactgroup;
|
||||
}
|
||||
|
||||
public List<String> getTool() {
|
||||
return tool;
|
||||
}
|
||||
|
||||
public void setTool(List<String> tool) {
|
||||
this.tool = tool;
|
||||
}
|
||||
|
||||
public String getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public void setSize(String size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(String version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public List<GeoLocation> getGeolocation() {
|
||||
return geolocation;
|
||||
}
|
||||
|
||||
public void setGeolocation(List<GeoLocation> geolocation) {
|
||||
this.geolocation = geolocation;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent keywords associated to the result. It has two parameters: - subject of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to
|
||||
* qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
|
||||
* is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to
|
||||
* dataInfo.trust
|
||||
*/
|
||||
public class Subject implements Serializable {
|
||||
private ControlledField subject;
|
||||
private Provenance provenance;
|
||||
|
||||
public ControlledField getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(ControlledField subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||
|
||||
/**
|
||||
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
|
||||
* information is added after the result is mapped to the external model - context of type
|
||||
* List<eu.dnetlib.dhp.schema/dump.oaf.community.Context> to store information about the RC RI related to the result.
|
||||
* For each context in the result represented in the internal model one context in the external model is produced -
|
||||
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
|
||||
* the record has been collected. For each collectedfrom in the result represented in the internal model one
|
||||
* collectedfrom in the external model is produced
|
||||
*/
|
||||
public class CommunityResult extends Result {
|
||||
|
||||
private List<Project> projects;
|
||||
|
||||
private List<Context> context;
|
||||
|
||||
protected List<KeyValue> collectedfrom;
|
||||
|
||||
public List<KeyValue> getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public List<Project> getProjects() {
|
||||
return projects;
|
||||
}
|
||||
|
||||
public void setProjects(List<Project> projects) {
|
||||
this.projects = projects;
|
||||
}
|
||||
|
||||
public List<Context> getContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
public void setContext(List<Context> context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||
|
||||
/**
|
||||
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
|
||||
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.Provenance> to store the provenances of the association between the result and
|
||||
* the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
|
||||
* to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
|
||||
* will be used as value for code - label it corresponds to the label associated to the id. The information id taken
|
||||
* from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
|
||||
* result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
|
||||
* instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
|
||||
* dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
|
||||
*/
|
||||
public class Context extends Qualifier {
|
||||
private List<Provenance> provenance;
|
||||
|
||||
public List<Provenance> getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(List<Provenance> provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
String provenance = new String();
|
||||
this.provenance.forEach(p -> provenance.concat(p.toString()));
|
||||
return Objects.hash(getCode(), getLabel(), provenance);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the funder funding the project related to the result. It has the following parameters: -
|
||||
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
|
||||
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
|
||||
* store the jurisdiction of the funder
|
||||
*/
|
||||
public class Funder implements Serializable {
|
||||
private String shortName;
|
||||
|
||||
private String name;
|
||||
|
||||
private String fundingStream;
|
||||
|
||||
private String jurisdiction;
|
||||
|
||||
public String getJurisdiction() {
|
||||
return jurisdiction;
|
||||
}
|
||||
|
||||
public void setJurisdiction(String jurisdiction) {
|
||||
this.jurisdiction = jurisdiction;
|
||||
}
|
||||
|
||||
public String getShortName() {
|
||||
return shortName;
|
||||
}
|
||||
|
||||
public void setShortName(String shortName) {
|
||||
this.shortName = shortName;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getFundingStream() {
|
||||
return fundingStream;
|
||||
}
|
||||
|
||||
public void setFundingStream(String fundingStream) {
|
||||
this.fundingStream = fundingStream;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
|
||||
/**
|
||||
* To store information about the project related to the result. This information is not directly mapped from the result
|
||||
* represented in the internal model because it is not there. The mapped result will be enriched with project
|
||||
* information derived by relation between results and projects. Project class has the following parameters: - id of
|
||||
* type String to store the OpenAIRE id for the Project - code of type String to store the grant agreement - acronym of
|
||||
* type String to store the acronym for the project - title of type String to store the title of the project - funder of
|
||||
* type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information about the funder funding the project -
|
||||
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store information about the. provenance of the
|
||||
* association between the result and the project
|
||||
*/
|
||||
public class Project implements Serializable {
|
||||
|
||||
private String id;// OpenAIRE id
|
||||
private String code;
|
||||
|
||||
private String acronym;
|
||||
|
||||
private String title;
|
||||
|
||||
private Funder funder;
|
||||
|
||||
private Provenance provenance;
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public Funder getFunder() {
|
||||
return funder;
|
||||
}
|
||||
|
||||
public void setFunder(Funder funders) {
|
||||
this.funder = funders;
|
||||
}
|
||||
|
||||
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
|
||||
Project project = new Project();
|
||||
project.setAcronym(acronym);
|
||||
project.setCode(code);
|
||||
project.setFunder(funder);
|
||||
project.setId(id);
|
||||
project.setTitle(title);
|
||||
return project;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Constants implements Serializable {
|
||||
// collectedFrom va con isProvidedBy -> becco da ModelSupport
|
||||
|
||||
public static final String HOSTED_BY = "isHostedBy";
|
||||
public static final String HOSTS = "hosts";
|
||||
|
||||
// community result uso isrelatedto
|
||||
|
||||
public static final String RESULT_ENTITY = "result";
|
||||
public static final String DATASOURCE_ENTITY = "datasource";
|
||||
public static final String CONTEXT_ENTITY = "context";
|
||||
|
||||
public static final String CONTEXT_ID = "60";
|
||||
public static final String CONTEXT_NS_PREFIX = "context____";
|
||||
|
||||
}
|
|
@ -0,0 +1,316 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Container;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
|
||||
/**
|
||||
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
|
||||
* id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
|
||||
* represented in the internal model - originalId of type List<String> to store the list of original ids associated to
|
||||
* the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
|
||||
* null values are filtered out - pid of type List<eu.dnetlib.shp.schema.dump.oaf.ControlledField> to store the
|
||||
* persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
|
||||
* in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
|
||||
* the internal model - value corresponds to pid.value of the datasource represented in the internal model -
|
||||
* datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
|
||||
* pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
|
||||
* corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
|
||||
* datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
|
||||
* store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
|
||||
* It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
|
||||
* of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
|
||||
* represented in the internal model - englishname of type String to store the English name of the datasource. It
|
||||
* corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
|
||||
* store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
|
||||
* the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
|
||||
* logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
|
||||
* of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
|
||||
* datasource represented in the internal model - description of type String to store the description for the
|
||||
* datasource. It corresponds to description.value of the datasource represented in the internal model
|
||||
*/
|
||||
public class Datasource implements Serializable {
|
||||
|
||||
private String id; // string
|
||||
|
||||
private List<String> originalId; // list string
|
||||
|
||||
private List<ControlledField> pid; // list<String>
|
||||
|
||||
private ControlledField datasourcetype; // value
|
||||
|
||||
private String openairecompatibility; // value
|
||||
|
||||
private String officialname; // string
|
||||
|
||||
private String englishname; // string
|
||||
|
||||
private String websiteurl; // string
|
||||
|
||||
private String logourl; // string
|
||||
|
||||
private String dateofvalidation; // string
|
||||
|
||||
private String description; // description
|
||||
|
||||
private List<String> subjects; // List<String>
|
||||
|
||||
// opendoar specific fields (od*)
|
||||
|
||||
private List<String> languages; // odlanguages List<String>
|
||||
|
||||
private List<String> contenttypes; // odcontent types List<String>
|
||||
|
||||
// re3data fields
|
||||
private String releasestartdate; // string
|
||||
|
||||
private String releaseenddate; // string
|
||||
|
||||
private String missionstatementurl; // string
|
||||
|
||||
// {open, restricted or closed}
|
||||
private String accessrights; // databaseaccesstype string
|
||||
|
||||
// {open, restricted or closed}
|
||||
private String uploadrights; // datauploadtype string
|
||||
|
||||
// {feeRequired, registration, other}
|
||||
private String databaseaccessrestriction; // string
|
||||
|
||||
// {feeRequired, registration, other}
|
||||
private String datauploadrestriction; // string
|
||||
|
||||
private Boolean versioning; // boolean
|
||||
|
||||
private String citationguidelineurl; // string
|
||||
|
||||
// {yes, no, uknown}
|
||||
|
||||
private String pidsystems; // string
|
||||
|
||||
private String certificates; // string
|
||||
|
||||
private List<Object> policies; //
|
||||
|
||||
private Container journal; // issn etc del Journal
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(List<String> originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public ControlledField getDatasourcetype() {
|
||||
return datasourcetype;
|
||||
}
|
||||
|
||||
public void setDatasourcetype(ControlledField datasourcetype) {
|
||||
this.datasourcetype = datasourcetype;
|
||||
}
|
||||
|
||||
public String getOpenairecompatibility() {
|
||||
return openairecompatibility;
|
||||
}
|
||||
|
||||
public void setOpenairecompatibility(String openairecompatibility) {
|
||||
this.openairecompatibility = openairecompatibility;
|
||||
}
|
||||
|
||||
public String getOfficialname() {
|
||||
return officialname;
|
||||
}
|
||||
|
||||
public void setOfficialname(String officialname) {
|
||||
this.officialname = officialname;
|
||||
}
|
||||
|
||||
public String getEnglishname() {
|
||||
return englishname;
|
||||
}
|
||||
|
||||
public void setEnglishname(String englishname) {
|
||||
this.englishname = englishname;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public String getLogourl() {
|
||||
return logourl;
|
||||
}
|
||||
|
||||
public void setLogourl(String logourl) {
|
||||
this.logourl = logourl;
|
||||
}
|
||||
|
||||
public String getDateofvalidation() {
|
||||
return dateofvalidation;
|
||||
}
|
||||
|
||||
public void setDateofvalidation(String dateofvalidation) {
|
||||
this.dateofvalidation = dateofvalidation;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public List<String> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(List<String> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public List<String> getLanguages() {
|
||||
return languages;
|
||||
}
|
||||
|
||||
public void setLanguages(List<String> languages) {
|
||||
this.languages = languages;
|
||||
}
|
||||
|
||||
public List<String> getContenttypes() {
|
||||
return contenttypes;
|
||||
}
|
||||
|
||||
public void setContenttypes(List<String> contenttypes) {
|
||||
this.contenttypes = contenttypes;
|
||||
}
|
||||
|
||||
public String getReleasestartdate() {
|
||||
return releasestartdate;
|
||||
}
|
||||
|
||||
public void setReleasestartdate(String releasestartdate) {
|
||||
this.releasestartdate = releasestartdate;
|
||||
}
|
||||
|
||||
public String getReleaseenddate() {
|
||||
return releaseenddate;
|
||||
}
|
||||
|
||||
public void setReleaseenddate(String releaseenddate) {
|
||||
this.releaseenddate = releaseenddate;
|
||||
}
|
||||
|
||||
public String getMissionstatementurl() {
|
||||
return missionstatementurl;
|
||||
}
|
||||
|
||||
public void setMissionstatementurl(String missionstatementurl) {
|
||||
this.missionstatementurl = missionstatementurl;
|
||||
}
|
||||
|
||||
public String getAccessrights() {
|
||||
return accessrights;
|
||||
}
|
||||
|
||||
public void setAccessrights(String accessrights) {
|
||||
this.accessrights = accessrights;
|
||||
}
|
||||
|
||||
public String getUploadrights() {
|
||||
return uploadrights;
|
||||
}
|
||||
|
||||
public void setUploadrights(String uploadrights) {
|
||||
this.uploadrights = uploadrights;
|
||||
}
|
||||
|
||||
public String getDatabaseaccessrestriction() {
|
||||
return databaseaccessrestriction;
|
||||
}
|
||||
|
||||
public void setDatabaseaccessrestriction(String databaseaccessrestriction) {
|
||||
this.databaseaccessrestriction = databaseaccessrestriction;
|
||||
}
|
||||
|
||||
public String getDatauploadrestriction() {
|
||||
return datauploadrestriction;
|
||||
}
|
||||
|
||||
public void setDatauploadrestriction(String datauploadrestriction) {
|
||||
this.datauploadrestriction = datauploadrestriction;
|
||||
}
|
||||
|
||||
public Boolean getVersioning() {
|
||||
return versioning;
|
||||
}
|
||||
|
||||
public void setVersioning(Boolean versioning) {
|
||||
this.versioning = versioning;
|
||||
}
|
||||
|
||||
public String getCitationguidelineurl() {
|
||||
return citationguidelineurl;
|
||||
}
|
||||
|
||||
public void setCitationguidelineurl(String citationguidelineurl) {
|
||||
this.citationguidelineurl = citationguidelineurl;
|
||||
}
|
||||
|
||||
public String getPidsystems() {
|
||||
return pidsystems;
|
||||
}
|
||||
|
||||
public void setPidsystems(String pidsystems) {
|
||||
this.pidsystems = pidsystems;
|
||||
}
|
||||
|
||||
public String getCertificates() {
|
||||
return certificates;
|
||||
}
|
||||
|
||||
public void setCertificates(String certificates) {
|
||||
this.certificates = certificates;
|
||||
}
|
||||
|
||||
public List<Object> getPolicies() {
|
||||
return policies;
|
||||
}
|
||||
|
||||
public void setPolicies(List<Object> policiesr3) {
|
||||
this.policies = policiesr3;
|
||||
}
|
||||
|
||||
public Container getJournal() {
|
||||
return journal;
|
||||
}
|
||||
|
||||
public void setJournal(Container journal) {
|
||||
this.journal = journal;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the funder funding the project related to the result. It has the following parameters:
|
||||
* - private String shortName to store the short name of the funder (e.g. AKA)
|
||||
* - private String name to store information about the name of the funder (e.g. Akademy of Finland)
|
||||
* - private Fundings funding_stream to store the fundingstream
|
||||
* - private String jurisdiction to store information about the jurisdiction of the funder
|
||||
*/
|
||||
public class Funder implements Serializable {
|
||||
|
||||
private String shortName;
|
||||
|
||||
private String name;
|
||||
|
||||
private Fundings funding_stream;
|
||||
|
||||
private String jurisdiction;
|
||||
|
||||
public String getShortName() {
|
||||
return shortName;
|
||||
}
|
||||
|
||||
public void setShortName(String shortName) {
|
||||
this.shortName = shortName;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getJurisdiction() {
|
||||
return jurisdiction;
|
||||
}
|
||||
|
||||
public void setJurisdiction(String jurisdiction) {
|
||||
this.jurisdiction = jurisdiction;
|
||||
}
|
||||
|
||||
public Fundings getFunding_stream() {
|
||||
return funding_stream;
|
||||
}
|
||||
|
||||
public void setFunding_stream(Fundings funding_stream) {
|
||||
this.funding_stream = funding_stream;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store inforamtion about the funding stream. It has two parameters:
|
||||
* - private String id to store the id of the fundings stream. The id is created by appending the shortname of the
|
||||
* funder to the name of each level in the xml representing the fundng stream. For example: if the funder is the
|
||||
* European Commission, the funding level 0 name is FP7, the funding level 1 name is SP3 and the funding level 2 name is
|
||||
* PEOPLE then the id will be: EC::FP7::SP3::PEOPLE
|
||||
* - private String description to describe the funding stream. It is created by concatenating the description of each funding
|
||||
* level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions
|
||||
*/
|
||||
public class Fundings implements Serializable {
|
||||
|
||||
private String id;
|
||||
private String description;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* To describe the funded amount. It has the following parameters:
|
||||
* - private String currency to store the currency of the fund
|
||||
* - private float totalcost to store the total cost of the project
|
||||
* - private float fundedamount to store the funded amount by the funder
|
||||
*/
|
||||
public class Granted implements Serializable {
|
||||
private String currency;
|
||||
private float totalcost;
|
||||
private float fundedamount;
|
||||
|
||||
public String getCurrency() {
|
||||
return currency;
|
||||
}
|
||||
|
||||
public void setCurrency(String currency) {
|
||||
this.currency = currency;
|
||||
}
|
||||
|
||||
public float getTotalcost() {
|
||||
return totalcost;
|
||||
}
|
||||
|
||||
public void setTotalcost(float totalcost) {
|
||||
this.totalcost = totalcost;
|
||||
}
|
||||
|
||||
public float getFundedamount() {
|
||||
return fundedamount;
|
||||
}
|
||||
|
||||
public void setFundedamount(float fundedamount) {
|
||||
this.fundedamount = fundedamount;
|
||||
}
|
||||
|
||||
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
|
||||
Granted granted = new Granted();
|
||||
granted.currency = currency;
|
||||
granted.totalcost = totalcost;
|
||||
granted.fundedamount = fundedamount;
|
||||
return granted;
|
||||
}
|
||||
|
||||
public static Granted newInstance(String currency, float fundedamount) {
|
||||
Granted granted = new Granted();
|
||||
granted.currency = currency;
|
||||
granted.fundedamount = fundedamount;
|
||||
return granted;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the generic node in a relation. It has the following parameters:
|
||||
* - private String id the openaire id of the entity in the relation
|
||||
* - private String type the type of the entity in the relation.
|
||||
*
|
||||
* Consider the generic relation between a Result R and a Project P, the node representing R will have
|
||||
* as id the id of R and as type result, while the node representing the project will have as id the id of the project
|
||||
* and as type project
|
||||
*/
|
||||
public class Node implements Serializable {
|
||||
private String id;
|
||||
private String type;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public static Node newInstance(String id, String type) {
|
||||
Node node = new Node();
|
||||
node.id = id;
|
||||
node.type = type;
|
||||
return node;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
|
||||
|
||||
/**
|
||||
* To represent the generic organizaiton. It has the following parameters:
|
||||
* - private String legalshortname to store the legalshortname of the organizaiton
|
||||
* - private String legalname to store the legal name of the organization
|
||||
* - private String websiteurl to store the websiteurl of the organization
|
||||
* - private List<String> alternativenames to store the alternative names of the organization
|
||||
* - private Qualifier country to store the country of the organization
|
||||
* - private String id to store the id of the organization
|
||||
* - private List<ControlledField> pid to store the list of pids for the organization
|
||||
*/
|
||||
public class Organization implements Serializable {
|
||||
private String legalshortname;
|
||||
private String legalname;
|
||||
private String websiteurl;
|
||||
private List<String> alternativenames;
|
||||
private Qualifier country;
|
||||
private String id;
|
||||
private List<ControlledField> pid;
|
||||
|
||||
public String getLegalshortname() {
|
||||
return legalshortname;
|
||||
}
|
||||
|
||||
public void setLegalshortname(String legalshortname) {
|
||||
this.legalshortname = legalshortname;
|
||||
}
|
||||
|
||||
public String getLegalname() {
|
||||
return legalname;
|
||||
}
|
||||
|
||||
public void setLegalname(String legalname) {
|
||||
this.legalname = legalname;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public List<String> getAlternativenames() {
|
||||
return alternativenames;
|
||||
}
|
||||
|
||||
public void setAlternativenames(List<String> alternativenames) {
|
||||
this.alternativenames = alternativenames;
|
||||
}
|
||||
|
||||
public Qualifier getCountry() {
|
||||
return country;
|
||||
}
|
||||
|
||||
public void setCountry(Qualifier country) {
|
||||
this.country = country;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the ec programme for the project. It has the following parameters:
|
||||
* - private String code to store the code of the programme
|
||||
* - private String description to store the description of the programme
|
||||
*/
|
||||
public class Programme implements Serializable {
|
||||
private String code;
|
||||
private String description;
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public static Programme newInstance(String code, String description) {
|
||||
Programme p = new Programme();
|
||||
p.code = code;
|
||||
p.description = description;
|
||||
return p;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
|
||||
/**
|
||||
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
|
||||
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
|
||||
* Projects but we put the information about the Funder within the Project representation. We also removed the
|
||||
* collected from element from the Project. No relation between the Project and the Datasource entity from which it is
|
||||
* collected will be created. We will never create relations between Project and Datasource. In case some relation will
|
||||
* be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project,
|
||||
* project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to
|
||||
* 0. It has the following parameters:
|
||||
* - private String id to store the id of the project (OpenAIRE id)
|
||||
* - private String websiteurl to store the websiteurl of the project
|
||||
* - private String code to store the grant agreement of the project
|
||||
* - private String acronym to store the acronym of the project
|
||||
* - private String title to store the tile of the project
|
||||
* - private String startdate to store the start date
|
||||
* - private String enddate to store the end date
|
||||
* - private String callidentifier to store the call indentifier
|
||||
* - private String keywords to store the keywords
|
||||
* - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate
|
||||
* for publications. This value will be set to true if one of the field in the project represented in the internal model
|
||||
* is set to true
|
||||
* - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for
|
||||
* dataset. It is set to the value in the corresponding filed of the project represented in the internal model
|
||||
* - private List<String> subject to store the list of subjects of the project
|
||||
* - private List<Funder> funding to store the list of funder of the project
|
||||
* - private String summary to store the summary of the project
|
||||
* - private Granted granted to store the granted amount
|
||||
* - private List<Programme> programme to store the list of programmes the project is related to
|
||||
*/
|
||||
|
||||
public class Project implements Serializable {
|
||||
private String id;
|
||||
|
||||
private String websiteurl;
|
||||
private String code;
|
||||
private String acronym;
|
||||
private String title;
|
||||
private String startdate;
|
||||
|
||||
private String enddate;
|
||||
|
||||
private String callidentifier;
|
||||
|
||||
private String keywords;
|
||||
|
||||
private boolean openaccessmandateforpublications;
|
||||
|
||||
private boolean openaccessmandatefordataset;
|
||||
private List<String> subject;
|
||||
|
||||
private List<Funder> funding;
|
||||
|
||||
private String summary;
|
||||
|
||||
private Granted granted;
|
||||
|
||||
private List<Programme> programme;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getStartdate() {
|
||||
return startdate;
|
||||
}
|
||||
|
||||
public void setStartdate(String startdate) {
|
||||
this.startdate = startdate;
|
||||
}
|
||||
|
||||
public String getEnddate() {
|
||||
return enddate;
|
||||
}
|
||||
|
||||
public void setEnddate(String enddate) {
|
||||
this.enddate = enddate;
|
||||
}
|
||||
|
||||
public String getCallidentifier() {
|
||||
return callidentifier;
|
||||
}
|
||||
|
||||
public void setCallidentifier(String callidentifier) {
|
||||
this.callidentifier = callidentifier;
|
||||
}
|
||||
|
||||
public String getKeywords() {
|
||||
return keywords;
|
||||
}
|
||||
|
||||
public void setKeywords(String keywords) {
|
||||
this.keywords = keywords;
|
||||
}
|
||||
|
||||
public boolean isOpenaccessmandateforpublications() {
|
||||
return openaccessmandateforpublications;
|
||||
}
|
||||
|
||||
public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) {
|
||||
this.openaccessmandateforpublications = openaccessmandateforpublications;
|
||||
}
|
||||
|
||||
public boolean isOpenaccessmandatefordataset() {
|
||||
return openaccessmandatefordataset;
|
||||
}
|
||||
|
||||
public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) {
|
||||
this.openaccessmandatefordataset = openaccessmandatefordataset;
|
||||
}
|
||||
|
||||
public List<String> getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(List<String> subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
public List<Funder> getFunding() {
|
||||
return funding;
|
||||
}
|
||||
|
||||
public void setFunding(List<Funder> funding) {
|
||||
this.funding = funding;
|
||||
}
|
||||
|
||||
public String getSummary() {
|
||||
return summary;
|
||||
}
|
||||
|
||||
public void setSummary(String summary) {
|
||||
this.summary = summary;
|
||||
}
|
||||
|
||||
public Granted getGranted() {
|
||||
return granted;
|
||||
}
|
||||
|
||||
public void setGranted(Granted granted) {
|
||||
this.granted = granted;
|
||||
}
|
||||
|
||||
public List<Programme> getProgramme() {
|
||||
return programme;
|
||||
}
|
||||
|
||||
public void setProgramme(List<Programme> programme) {
|
||||
this.programme = programme;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the semantics of the generic relation between two entities. It has the following parameters:
|
||||
* - private String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the
|
||||
* relclass parameter in the relation represented in the internal model
|
||||
* represented in the internal model
|
||||
* - private String type to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter
|
||||
* of the relation represented in theinternal model
|
||||
*/
|
||||
public class RelType implements Serializable {
|
||||
private String name; // relclass
|
||||
private String type; // subreltype
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public static RelType newInstance(String name, String type) {
|
||||
RelType rel = new RelType();
|
||||
rel.name = name;
|
||||
rel.type = type;
|
||||
return rel;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
|
||||
/**
|
||||
* To represent the gereric relation between two entities. It has the following parameters:
|
||||
* - private Node source to represent the entity source of the relation
|
||||
* - private Node target to represent the entity target of the relation
|
||||
* - private RelType reltype to represent the semantics of the relation
|
||||
* - private Provenance provenance to represent the provenance of the relation
|
||||
*/
|
||||
public class Relation implements Serializable {
|
||||
private Node source;
|
||||
private Node target;
|
||||
private RelType reltype;
|
||||
private Provenance provenance;
|
||||
|
||||
public Node getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public void setSource(Node source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public Node getTarget() {
|
||||
return target;
|
||||
}
|
||||
|
||||
public void setTarget(Node target) {
|
||||
this.target = target;
|
||||
}
|
||||
|
||||
public RelType getReltype() {
|
||||
return reltype;
|
||||
}
|
||||
|
||||
public void setReltype(RelType reltype) {
|
||||
this.reltype = reltype;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
|
||||
}
|
||||
|
||||
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
|
||||
Relation relation = new Relation();
|
||||
relation.source = source;
|
||||
relation.target = target;
|
||||
relation.reltype = reltype;
|
||||
relation.provenance = provenance;
|
||||
return relation;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
|
||||
* to store the list of subjects related to the community
|
||||
*/
|
||||
public class ResearchCommunity extends ResearchInitiative {
|
||||
private List<String> subject;
|
||||
|
||||
public List<String> getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(List<String> subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
|
||||
* - private String id to store the openaire id for the entity. Is has as code 00 and will be created as
|
||||
* 00|context_____::md5(originalId)
|
||||
* private String originalId to store the id of the context as provided in the profile (i.e. mes)
|
||||
* private String name to store the name of the context (got from the label attribute in the context definition)
|
||||
* private String type to store the type of the context (i.e.: research initiative or research community)
|
||||
* private String description to store the description of the context as given in the profile
|
||||
* private String zenodo_community to store the zenodo community associated to the context (main zenodo community)
|
||||
*/
|
||||
public class ResearchInitiative implements Serializable {
|
||||
private String id; // openaireId
|
||||
private String originalId; // context id
|
||||
private String name; // context name
|
||||
private String type; // context type: research initiative or research community
|
||||
private String description;
|
||||
private String zenodo_community;
|
||||
|
||||
public String getZenodo_community() {
|
||||
return zenodo_community;
|
||||
}
|
||||
|
||||
public void setZenodo_community(String zenodo_community) {
|
||||
this.zenodo_community = zenodo_community;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String label) {
|
||||
this.name = label;
|
||||
}
|
||||
|
||||
public String getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(String originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
}
|
|
@ -110,13 +110,6 @@ public class CommunityConfigurationFactory {
|
|||
}
|
||||
|
||||
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
||||
// final Node oacommunitynode = node.selectSingleNode("./oacommunity");
|
||||
// String oacommunity = null;
|
||||
// if (oacommunitynode != null) {
|
||||
// String tmp = oacommunitynode.getText();
|
||||
// if (StringUtils.isNotBlank(tmp))
|
||||
// oacommunity = tmp;
|
||||
// }
|
||||
|
||||
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
||||
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
|
||||
|
@ -127,11 +120,7 @@ public class CommunityConfigurationFactory {
|
|||
|
||||
zenodoCommunityList.add(zc);
|
||||
}
|
||||
// if (oacommunity != null) {
|
||||
// ZenodoCommunity zc = new ZenodoCommunity();
|
||||
// zc.setZenodoCommunityId(oacommunity);
|
||||
// zenodoCommunityList.add(zc);
|
||||
// }
|
||||
|
||||
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
||||
return zenodoCommunityList;
|
||||
}
|
||||
|
|
|
@ -20,8 +20,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
|||
/** Created by miriam on 02/08/2018. */
|
||||
public class ResultTagger implements Serializable {
|
||||
|
||||
private String trust = "0.8";
|
||||
|
||||
private boolean clearContext(Result result) {
|
||||
int tmp = result.getContext().size();
|
||||
List<Context> clist = result
|
||||
|
@ -73,7 +71,8 @@ public class ResultTagger implements Serializable {
|
|||
final Set<String> subjects = new HashSet<>();
|
||||
|
||||
if (Objects.nonNull(result.getSubject())) {
|
||||
result.getSubject()
|
||||
result
|
||||
.getSubject()
|
||||
.stream()
|
||||
.map(subject -> subject.getValue())
|
||||
.filter(StringUtils::isNotBlank)
|
||||
|
@ -170,21 +169,24 @@ public class ResultTagger implements Serializable {
|
|||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_SUBJECT,
|
||||
CLASS_NAME_BULKTAG_SUBJECT));
|
||||
CLASS_NAME_BULKTAG_SUBJECT,
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(c.getId()))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_DATASOURCE,
|
||||
CLASS_NAME_BULKTAG_DATASOURCE));
|
||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(c.getId()))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_CZENODO,
|
||||
CLASS_NAME_BULKTAG_ZENODO));
|
||||
CLASS_NAME_BULKTAG_ZENODO,
|
||||
TAGGING_TRUST));
|
||||
}
|
||||
return c;
|
||||
})
|
||||
|
@ -210,21 +212,24 @@ public class ResultTagger implements Serializable {
|
|||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_SUBJECT,
|
||||
CLASS_NAME_BULKTAG_SUBJECT));
|
||||
CLASS_NAME_BULKTAG_SUBJECT,
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_DATASOURCE,
|
||||
CLASS_NAME_BULKTAG_DATASOURCE));
|
||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_CZENODO,
|
||||
CLASS_NAME_BULKTAG_ZENODO));
|
||||
CLASS_NAME_BULKTAG_ZENODO,
|
||||
TAGGING_TRUST));
|
||||
context.setDataInfo(dataInfoList);
|
||||
return context;
|
||||
})
|
||||
|
@ -235,11 +240,12 @@ public class ResultTagger implements Serializable {
|
|||
}
|
||||
|
||||
public static DataInfo getDataInfo(
|
||||
String inference_provenance, String inference_class_id, String inference_class_name) {
|
||||
String inference_provenance, String inference_class_id, String inference_class_name, String trust) {
|
||||
DataInfo di = new DataInfo();
|
||||
di.setInferred(true);
|
||||
di.setInferenceprovenance(inference_provenance);
|
||||
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
|
||||
di.setTrust(trust);
|
||||
return di;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,4 +14,6 @@ public class TaggingConstants {
|
|||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
||||
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
|
||||
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
||||
|
||||
public static final String TAGGING_TRUST = "0.8";
|
||||
}
|
||||
|
|
|
@ -106,12 +106,6 @@
|
|||
<subject>aqua</subject>
|
||||
<subject>sea</subject>
|
||||
</subjects>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="aginfra">
|
||||
|
|
|
@ -42,6 +42,12 @@
|
|||
|
||||
<dependencies>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-compress</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
|
@ -59,6 +65,12 @@
|
|||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-hive_2.11</artifactId>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
@ -92,14 +104,21 @@
|
|||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpmime</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.victools</groupId>
|
||||
<artifactId>jsonschema-generator</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json4s</groupId>
|
||||
<artifactId>json4s-jackson_2.11</artifactId>
|
||||
<version>3.5.3</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
public class Constants {
|
||||
|
||||
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
||||
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
||||
|
||||
public static final String INFERRED = "Inferred by OpenAIRE";
|
||||
|
||||
public static final String HARVESTED = "Harvested";
|
||||
public static final String DEFAULT_TRUST = "0.9";
|
||||
public static final String USER_CLAIM = "Linked by user";;
|
||||
|
||||
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||
|
||||
public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
|
||||
|
||||
public static String RESEARCH_COMMUNITY = "Research Community";
|
||||
|
||||
public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
||||
|
||||
public static String ORCID = "orcid";
|
||||
|
||||
static {
|
||||
accessRightsCoarMap.put("OPEN", "c_abf2");
|
||||
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||
accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
|
||||
accessRightsCoarMap.put("CLOSED", "c_14cb");
|
||||
accessRightsCoarMap.put("EMBARGO", "c_f1cf");
|
||||
}
|
||||
|
||||
static {
|
||||
coarCodeLabelMap.put("c_abf2", "OPEN");
|
||||
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
|
||||
coarCodeLabelMap.put("c_14cb", "CLOSED");
|
||||
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
/**
|
||||
* It fires the execution of the actual dump for result entities. If the dump is for RC/RI products its checks for each
|
||||
* result its belongingess to at least one RC/RI before "asking" for its mapping.
|
||||
*/
|
||||
public class DumpProducts implements Serializable {
|
||||
|
||||
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
|
||||
Class<? extends OafEntity> inputClazz,
|
||||
Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result> outputClazz,
|
||||
boolean graph) {
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, graph);
|
||||
});
|
||||
}
|
||||
|
||||
public static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> void execDump(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
String communityMapPath,
|
||||
Class<I> inputClazz,
|
||||
Class<O> outputClazz,
|
||||
boolean graph) {
|
||||
|
||||
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||
|
||||
Utils
|
||||
.readPath(spark, inputPath, inputClazz)
|
||||
.map(value -> execMap(value, communityMap, graph), Encoders.bean(outputClazz))
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
||||
}
|
||||
|
||||
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
|
||||
CommunityMap communityMap,
|
||||
boolean graph) {
|
||||
|
||||
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||
if (odInfo.isPresent()) {
|
||||
if (odInfo.get().getDeletedbyinference()) {
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!graph) {
|
||||
Set<String> communities = communityMap.keySet();
|
||||
|
||||
Optional<List<Context>> inputContext = Optional
|
||||
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
|
||||
if (!inputContext.isPresent()) {
|
||||
return null;
|
||||
}
|
||||
List<String> toDumpFor = inputContext.get().stream().map(c -> {
|
||||
if (communities.contains(c.getId())) {
|
||||
return c.getId();
|
||||
}
|
||||
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
|
||||
return c.getId().substring(0, 3);
|
||||
}
|
||||
return null;
|
||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||
if (toDumpFor.size() == 0) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return (O) ResultMapper.map(value, communityMap, graph);
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.commons.compress.archivers.ar.ArArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
|
||||
public class MakeTar implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MakeTar.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
MakeTar.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String outputPath = parser.get("hdfsPath");
|
||||
log.info("hdfsPath: {}", outputPath);
|
||||
|
||||
final String hdfsNameNode = parser.get("nameNode");
|
||||
log.info("nameNode: {}", hdfsNameNode);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("input path : {}", inputPath);
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
|
||||
makeTArArchive(fileSystem, inputPath, outputPath);
|
||||
|
||||
}
|
||||
|
||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath) throws IOException {
|
||||
|
||||
RemoteIterator<LocatedFileStatus> dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||
|
||||
while (dir_iterator.hasNext()) {
|
||||
LocatedFileStatus fileStatus = dir_iterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String p_string = p.toString();
|
||||
String entity = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||
|
||||
write(fileSystem, p_string, outputPath + "/" + entity + ".tar", entity);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
||||
throws IOException {
|
||||
|
||||
Path hdfsWritePath = new Path(outputPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fileSystem.delete(hdfsWritePath, true);
|
||||
|
||||
}
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
|
||||
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
||||
|
||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||
.listFiles(
|
||||
new Path(inputPath), true);
|
||||
|
||||
while (fileStatusListIterator.hasNext()) {
|
||||
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String p_string = p.toString();
|
||||
if (!p_string.endsWith("_SUCCESS")) {
|
||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name + ".json.gz");
|
||||
entry.setSize(fileStatus.getLen());
|
||||
ar.putArchiveEntry(entry);
|
||||
|
||||
InputStream is = fileSystem.open(fileStatus.getPath());
|
||||
|
||||
BufferedInputStream bis = new BufferedInputStream(is);
|
||||
|
||||
int count;
|
||||
byte data[] = new byte[1024];
|
||||
while ((count = bis.read(data, 0, data.length)) != -1) {
|
||||
ar.write(data, 0, count);
|
||||
}
|
||||
bis.close();
|
||||
ar.closeArchiveEntry();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ar.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.List;
|
||||
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.io.SAXReader;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class QueryInformationSystem {
|
||||
|
||||
private ISLookUpService isLookUp;
|
||||
|
||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||
+
|
||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||
" and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') "
|
||||
+
|
||||
" return " +
|
||||
"<community> " +
|
||||
"{$x//CONFIGURATION/context/@id}" +
|
||||
"{$x//CONFIGURATION/context/@label}" +
|
||||
"</community>";
|
||||
|
||||
public CommunityMap getCommunityMap()
|
||||
throws ISLookUpException, DocumentException {
|
||||
return getMap(isLookUp.quickSearchProfile(XQUERY));
|
||||
|
||||
}
|
||||
|
||||
public ISLookUpService getIsLookUp() {
|
||||
return isLookUp;
|
||||
}
|
||||
|
||||
public void setIsLookUp(ISLookUpService isLookUpService) {
|
||||
this.isLookUp = isLookUpService;
|
||||
}
|
||||
|
||||
private CommunityMap getMap(List<String> communityMap) throws DocumentException {
|
||||
final CommunityMap map = new CommunityMap();
|
||||
|
||||
for (String xml : communityMap) {
|
||||
final Document doc;
|
||||
doc = new SAXReader().read(new StringReader(xml));
|
||||
Element root = doc.getRootElement();
|
||||
map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,523 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class ResultMapper implements Serializable {
|
||||
|
||||
public static <I extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
||||
I in, Map<String, String> communityMap, boolean graph) {
|
||||
|
||||
Result out;
|
||||
if (graph) {
|
||||
out = new Result();
|
||||
} else {
|
||||
out = new CommunityResult();
|
||||
}
|
||||
|
||||
eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
|
||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
|
||||
if (ort.isPresent()) {
|
||||
switch (ort.get().getClassid()) {
|
||||
case "publication":
|
||||
Optional<Journal> journal = Optional
|
||||
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
|
||||
if (journal.isPresent()) {
|
||||
Journal j = journal.get();
|
||||
Container c = new Container();
|
||||
c.setConferencedate(j.getConferencedate());
|
||||
c.setConferenceplace(j.getConferenceplace());
|
||||
c.setEdition(j.getEdition());
|
||||
c.setEp(j.getEp());
|
||||
c.setIss(j.getIss());
|
||||
c.setIssnLinking(j.getIssnLinking());
|
||||
c.setIssnOnline(j.getIssnOnline());
|
||||
c.setIssnPrinted(j.getIssnPrinted());
|
||||
c.setName(j.getName());
|
||||
c.setSp(j.getSp());
|
||||
c.setVol(j.getVol());
|
||||
out.setContainer(c);
|
||||
out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
|
||||
}
|
||||
break;
|
||||
case "dataset":
|
||||
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
|
||||
Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
|
||||
Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
|
||||
|
||||
out
|
||||
.setGeolocation(
|
||||
Optional
|
||||
.ofNullable(id.getGeolocation())
|
||||
.map(
|
||||
igl -> igl
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(gli -> {
|
||||
GeoLocation gl = new GeoLocation();
|
||||
gl.setBox(gli.getBox());
|
||||
gl.setPlace(gli.getPlace());
|
||||
gl.setPoint(gli.getPoint());
|
||||
return gl;
|
||||
})
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(null));
|
||||
|
||||
out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
|
||||
break;
|
||||
case "software":
|
||||
|
||||
eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
|
||||
Optional
|
||||
.ofNullable(is.getCodeRepositoryUrl())
|
||||
.ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
|
||||
Optional
|
||||
.ofNullable(is.getDocumentationUrl())
|
||||
.ifPresent(
|
||||
value -> out
|
||||
.setDocumentationUrl(
|
||||
value
|
||||
.stream()
|
||||
.map(v -> v.getValue())
|
||||
.collect(Collectors.toList())));
|
||||
|
||||
Optional
|
||||
.ofNullable(is.getProgrammingLanguage())
|
||||
.ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
|
||||
|
||||
out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
|
||||
break;
|
||||
case "other":
|
||||
|
||||
eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
|
||||
out
|
||||
.setContactgroup(
|
||||
Optional
|
||||
.ofNullable(ir.getContactgroup())
|
||||
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
|
||||
.orElse(null));
|
||||
|
||||
out
|
||||
.setContactperson(
|
||||
Optional
|
||||
.ofNullable(ir.getContactperson())
|
||||
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
|
||||
.orElse(null));
|
||||
out
|
||||
.setTool(
|
||||
Optional
|
||||
.ofNullable(ir.getTool())
|
||||
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
|
||||
.orElse(null));
|
||||
|
||||
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
Optional
|
||||
.ofNullable(input.getAuthor())
|
||||
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
|
||||
|
||||
// I do not map Access Right UNKNOWN or OTHER
|
||||
|
||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
||||
if (oar.isPresent()) {
|
||||
if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
||||
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
|
||||
out
|
||||
.setBestaccessright(
|
||||
AccessRight
|
||||
.newInstance(
|
||||
code,
|
||||
Constants.coarCodeLabelMap.get(code),
|
||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||
}
|
||||
}
|
||||
|
||||
final List<String> contributorList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getContributor())
|
||||
.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
|
||||
out.setContributor(contributorList);
|
||||
|
||||
// List<Country> countryList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getCountry())
|
||||
.ifPresent(
|
||||
value -> out
|
||||
.setCountry(
|
||||
value
|
||||
.stream()
|
||||
.map(
|
||||
c -> {
|
||||
if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
|
||||
return null;
|
||||
}
|
||||
Country country = new Country();
|
||||
country.setCode(c.getClassid());
|
||||
country.setLabel(c.getClassname());
|
||||
Optional
|
||||
.ofNullable(c.getDataInfo())
|
||||
.ifPresent(
|
||||
provenance -> country
|
||||
.setProvenance(
|
||||
Provenance
|
||||
.newInstance(
|
||||
provenance
|
||||
.getProvenanceaction()
|
||||
.getClassname(),
|
||||
c.getDataInfo().getTrust())));
|
||||
return country;
|
||||
})
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList())));
|
||||
|
||||
// out.setCountry(countryList);
|
||||
|
||||
final List<String> coverageList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getCoverage())
|
||||
.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
|
||||
out.setCoverage(coverageList);
|
||||
|
||||
out.setDateofcollection(input.getDateofcollection());
|
||||
|
||||
final List<String> descriptionList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getDescription())
|
||||
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
||||
out.setDescription(descriptionList);
|
||||
Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
|
||||
if (oStr.isPresent()) {
|
||||
out.setEmbargoenddate(oStr.get().getValue());
|
||||
}
|
||||
|
||||
final List<String> formatList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getFormat())
|
||||
.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
|
||||
out.setFormat(formatList);
|
||||
out.setId(input.getId());
|
||||
out.setOriginalId(input.getOriginalId());
|
||||
|
||||
final List<Instance> instanceList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getInstance())
|
||||
.ifPresent(
|
||||
inst -> inst
|
||||
.stream()
|
||||
.forEach(i -> instanceList.add(getInstance(i, graph))));
|
||||
out
|
||||
.setInstance(instanceList);
|
||||
|
||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
|
||||
if (oL.isPresent()) {
|
||||
eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
|
||||
out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
|
||||
}
|
||||
Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
|
||||
if (oLong.isPresent()) {
|
||||
out.setLastupdatetimestamp(oLong.get());
|
||||
}
|
||||
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
|
||||
if (otitle.isPresent()) {
|
||||
List<StructuredProperty> iTitle = otitle
|
||||
.get()
|
||||
.stream()
|
||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||
.collect(Collectors.toList());
|
||||
if (iTitle.size() > 0) {
|
||||
out.setMaintitle(iTitle.get(0).getValue());
|
||||
}
|
||||
|
||||
iTitle = otitle
|
||||
.get()
|
||||
.stream()
|
||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||
.collect(Collectors.toList());
|
||||
if (iTitle.size() > 0) {
|
||||
out.setSubtitle(iTitle.get(0).getValue());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
List<ControlledField> pids = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getPid())
|
||||
.ifPresent(
|
||||
value -> value
|
||||
.stream()
|
||||
.forEach(
|
||||
p -> pids
|
||||
.add(
|
||||
ControlledField
|
||||
.newInstance(p.getQualifier().getClassid(), p.getValue()))));
|
||||
out.setPid(pids);
|
||||
oStr = Optional.ofNullable(input.getDateofacceptance());
|
||||
if (oStr.isPresent()) {
|
||||
out.setPublicationdate(oStr.get().getValue());
|
||||
}
|
||||
oStr = Optional.ofNullable(input.getPublisher());
|
||||
if (oStr.isPresent()) {
|
||||
out.setPublisher(oStr.get().getValue());
|
||||
}
|
||||
|
||||
List<String> sourceList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getSource())
|
||||
.ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
|
||||
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
||||
List<Subject> subjectList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getSubject())
|
||||
.ifPresent(
|
||||
value -> value
|
||||
.forEach(s -> subjectList.add(getSubject(s))));
|
||||
|
||||
out.setSubjects(subjectList);
|
||||
|
||||
out.setType(input.getResulttype().getClassid());
|
||||
}
|
||||
|
||||
if (!graph) {
|
||||
((CommunityResult) out)
|
||||
.setCollectedfrom(
|
||||
input
|
||||
.getCollectedfrom()
|
||||
.stream()
|
||||
.map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
Set<String> communities = communityMap.keySet();
|
||||
List<Context> contextList = Optional
|
||||
.ofNullable(
|
||||
input
|
||||
.getContext())
|
||||
.map(
|
||||
value -> value
|
||||
.stream()
|
||||
.map(c -> {
|
||||
String community_id = c.getId();
|
||||
if (community_id.indexOf("::") > 0) {
|
||||
community_id = community_id.substring(0, community_id.indexOf("::"));
|
||||
}
|
||||
if (communities.contains(community_id)) {
|
||||
Context context = new Context();
|
||||
context.setCode(community_id);
|
||||
context.setLabel(communityMap.get(community_id));
|
||||
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
||||
if (dataInfo.isPresent()) {
|
||||
List<Provenance> provenance = new ArrayList<>();
|
||||
provenance
|
||||
.addAll(
|
||||
dataInfo
|
||||
.get()
|
||||
.stream()
|
||||
.map(
|
||||
di -> Optional
|
||||
.ofNullable(di.getProvenanceaction())
|
||||
.map(
|
||||
provenanceaction -> Provenance
|
||||
.newInstance(
|
||||
provenanceaction.getClassname(), di.getTrust()))
|
||||
.orElse(null))
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toSet()));
|
||||
|
||||
context.setProvenance(getUniqueProvenance(provenance));
|
||||
}
|
||||
return context;
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(new ArrayList<>());
|
||||
|
||||
if (contextList.size() > 0) {
|
||||
Set<Integer> hashValue = new HashSet<>();
|
||||
List<Context> remainigContext = new ArrayList<>();
|
||||
contextList.forEach(c -> {
|
||||
if (!hashValue.contains(c.hashCode())) {
|
||||
remainigContext.add(c);
|
||||
hashValue.add(c.hashCode());
|
||||
}
|
||||
});
|
||||
((CommunityResult) out).setContext(remainigContext);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
|
||||
}
|
||||
|
||||
private static Instance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i, boolean graph) {
|
||||
|
||||
Instance instance = new Instance();
|
||||
|
||||
if(!graph){
|
||||
instance
|
||||
.setCollectedfrom(
|
||||
KeyValue
|
||||
.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
|
||||
instance
|
||||
.setHostedby(
|
||||
KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
|
||||
}
|
||||
|
||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> opAr = Optional
|
||||
.ofNullable(i.getAccessright());
|
||||
if (opAr.isPresent()) {
|
||||
if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
||||
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
|
||||
instance
|
||||
.setAccessright(
|
||||
AccessRight
|
||||
.newInstance(
|
||||
code,
|
||||
Constants.coarCodeLabelMap.get(code),
|
||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Optional
|
||||
.ofNullable(i.getLicense())
|
||||
.ifPresent(value -> instance.setLicense(value.getValue()));
|
||||
Optional
|
||||
.ofNullable(i.getDateofacceptance())
|
||||
.ifPresent(value -> instance.setPublicationdate(value.getValue()));
|
||||
Optional
|
||||
.ofNullable(i.getRefereed())
|
||||
.ifPresent(value -> instance.setRefereed(value.getClassname()));
|
||||
// .ifPresent(value -> instance.setRefereed(value.getValue()));
|
||||
Optional
|
||||
.ofNullable(i.getInstancetype())
|
||||
.ifPresent(value -> instance.setType(value.getClassname()));
|
||||
Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) {
|
||||
Provenance iProv = new Provenance();
|
||||
// iProv.setProvenance(Constants.INFERRED);
|
||||
|
||||
Provenance hProv = new Provenance();
|
||||
// hProv.setProvenance(Constants.HARVESTED);
|
||||
Provenance lProv = new Provenance();
|
||||
|
||||
for (Provenance p : provenance) {
|
||||
switch (p.getProvenance()) {
|
||||
case Constants.HARVESTED:
|
||||
hProv = getHighestTrust(hProv, p);
|
||||
break;
|
||||
case Constants.INFERRED:
|
||||
iProv = getHighestTrust(iProv, p);
|
||||
// To be removed as soon as the new beta run has been done
|
||||
// this fixex issue of not set trust during bulktagging
|
||||
if (StringUtils.isEmpty(iProv.getTrust())) {
|
||||
iProv.setTrust(Constants.DEFAULT_TRUST);
|
||||
}
|
||||
break;
|
||||
case Constants.USER_CLAIM:
|
||||
lProv = getHighestTrust(lProv, p);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return Arrays
|
||||
.asList(iProv, hProv, lProv)
|
||||
.stream()
|
||||
.filter(p -> !StringUtils.isEmpty(p.getProvenance()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
||||
private static Provenance getHighestTrust(Provenance hProv, Provenance p) {
|
||||
if (StringUtils.isNoneEmpty(hProv.getTrust(), p.getTrust()))
|
||||
return hProv.getTrust().compareTo(p.getTrust()) > 0 ? hProv : p;
|
||||
|
||||
return (StringUtils.isEmpty(p.getTrust()) && !StringUtils.isEmpty(hProv.getTrust())) ? hProv : p;
|
||||
|
||||
}
|
||||
|
||||
private static Subject getSubject(StructuredProperty s) {
|
||||
Subject subject = new Subject();
|
||||
subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
|
||||
Optional<DataInfo> di = Optional.ofNullable(s.getDataInfo());
|
||||
if (di.isPresent()) {
|
||||
Provenance p = new Provenance();
|
||||
p.setProvenance(di.get().getProvenanceaction().getClassname());
|
||||
p.setTrust(di.get().getTrust());
|
||||
subject.setProvenance(p);
|
||||
}
|
||||
|
||||
return subject;
|
||||
}
|
||||
|
||||
private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
|
||||
Author a = new Author();
|
||||
a.setFullname(oa.getFullname());
|
||||
a.setName(oa.getName());
|
||||
a.setSurname(oa.getSurname());
|
||||
a.setRank(oa.getRank());
|
||||
|
||||
Optional<List<StructuredProperty>> oPids = Optional
|
||||
.ofNullable(oa.getPid());
|
||||
if (oPids.isPresent()) {
|
||||
Pid pid = getOrcid(oPids.get());
|
||||
if (pid != null) {
|
||||
a.setPid(pid);
|
||||
}
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
private static Pid getOrcid(List<StructuredProperty> p) {
|
||||
for (StructuredProperty pid : p) {
|
||||
if (pid.getQualifier().getClassid().equals(Constants.ORCID)) {
|
||||
Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
|
||||
if (di.isPresent()) {
|
||||
return Pid
|
||||
.newInstance(
|
||||
ControlledField
|
||||
.newInstance(
|
||||
pid.getQualifier().getClassid(),
|
||||
pid.getValue()),
|
||||
Provenance
|
||||
.newInstance(
|
||||
di.get().getProvenanceaction().getClassname(),
|
||||
di.get().getTrust()));
|
||||
} else {
|
||||
return Pid
|
||||
.newInstance(
|
||||
ControlledField
|
||||
.newInstance(
|
||||
pid.getQualifier().getClassid(),
|
||||
pid.getValue())
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
|
||||
/**
|
||||
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
|
||||
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
|
||||
* - research infrastructure/initiative , the value is the label of the research community - research
|
||||
* infrastructure/initiative.
|
||||
*/
|
||||
|
||||
public class SaveCommunityMap implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
||||
private final QueryInformationSystem queryInformationSystem;
|
||||
|
||||
private final Configuration conf;
|
||||
private final BufferedWriter writer;
|
||||
|
||||
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
||||
conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
Path hdfsWritePath = new Path(hdfsPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fileSystem.delete(hdfsWritePath);
|
||||
}
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
|
||||
queryInformationSystem = new QueryInformationSystem();
|
||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||
|
||||
writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SaveCommunityMap.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String nameNode = parser.get("nameNode");
|
||||
log.info("nameNode: {}", nameNode);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||
|
||||
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
|
||||
|
||||
scm.saveCommunityMap();
|
||||
|
||||
}
|
||||
|
||||
private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException {
|
||||
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap()));
|
||||
writer.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
|
||||
public class SendToZenodoHDFS implements Serializable {
|
||||
|
||||
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
|
||||
|
||||
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
SendToZenodoHDFS.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json")));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
final String hdfsNameNode = parser.get("nameNode");
|
||||
final String access_token = parser.get("accessToken");
|
||||
final String connection_url = parser.get("connectionUrl");
|
||||
final String metadata = parser.get("metadata");
|
||||
final Boolean newDeposition = Boolean.valueOf(parser.get("newDeposition"));
|
||||
final String concept_rec_id = Optional
|
||||
.ofNullable(parser.get("conceptRecordId"))
|
||||
.orElse(null);
|
||||
final String communityMapPath = parser.get("communityMapPath");
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
|
||||
CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
|
||||
|
||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||
.listFiles(
|
||||
new Path(hdfsPath), true);
|
||||
ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token);
|
||||
if (newDeposition) {
|
||||
zenodoApiClient.newDeposition();
|
||||
} else {
|
||||
if (concept_rec_id == null) {
|
||||
throw new MissingConceptDoiException("No concept record id has been provided");
|
||||
}
|
||||
zenodoApiClient.newVersion(concept_rec_id);
|
||||
}
|
||||
|
||||
while (fileStatusListIterator.hasNext()) {
|
||||
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String p_string = p.toString();
|
||||
if (!p_string.endsWith("_SUCCESS")) {
|
||||
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
|
||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||
log.info("Sending information for community: " + name);
|
||||
if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
|
||||
name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
|
||||
}
|
||||
|
||||
FSDataInputStream inputStream = fileSystem.open(p);
|
||||
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
zenodoApiClient.sendMretadata(metadata);
|
||||
zenodoApiClient.publish();
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.graph.Constants;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class Utils {
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void removeOutputDir(SparkSession spark, String path) {
|
||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return spark
|
||||
.read()
|
||||
.textFile(inputPath)
|
||||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||
}
|
||||
|
||||
public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
|
||||
return ISLookupClientFactory.getLookUpService(isLookUpUrl);
|
||||
}
|
||||
|
||||
public static String getContextId(String id) {
|
||||
|
||||
return String
|
||||
.format(
|
||||
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
|
||||
DHPUtils.md5(id));
|
||||
}
|
||||
|
||||
public static CommunityMap getCommunityMap(SparkSession spark, String communityMapPath) {
|
||||
|
||||
return new Gson().fromJson(spark.read().textFile(communityMapPath).collectAsList().get(0), CommunityMap.class);
|
||||
|
||||
}
|
||||
|
||||
public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath))));
|
||||
StringBuffer sb = new StringBuffer();
|
||||
try {
|
||||
String line;
|
||||
while ((line = br.readLine()) != null) {
|
||||
sb.append(line);
|
||||
}
|
||||
} finally {
|
||||
br.close();
|
||||
|
||||
}
|
||||
|
||||
return new Gson().fromJson(sb.toString(), CommunityMap.class);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class CommunityMap extends HashMap<String, String> implements Serializable {
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||
|
||||
/**
|
||||
* This class splits the dumped results according to the research community - research initiative/infrastructure they
|
||||
* are related to. The information about the community is found in the element "context.id" in the result. Since the
|
||||
* context that can be found in the result can be associated not only to communities, a community Map is provided. It
|
||||
* will guide the splitting process. Note: the repartition(1) just before writing the results related to a community.
|
||||
* This is a choice due to uploading constraints (just one file for each community) As soon as a better solution will be
|
||||
* in place remove the repartition
|
||||
*/
|
||||
public class CommunitySplit implements Serializable {
|
||||
|
||||
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath) {
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
execSplit(spark, inputPath, outputPath, Utils.getCommunityMap(spark, communityMapPath).keySet());
|
||||
});
|
||||
}
|
||||
|
||||
private static void execSplit(SparkSession spark, String inputPath, String outputPath,
|
||||
Set<String> communities) {
|
||||
|
||||
Dataset<CommunityResult> result = Utils
|
||||
.readPath(spark, inputPath + "/publication", CommunityResult.class)
|
||||
.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
||||
|
||||
communities
|
||||
.stream()
|
||||
.forEach(c -> printResult(c, result, outputPath));
|
||||
|
||||
}
|
||||
|
||||
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
|
||||
Dataset<CommunityResult> community_products = result
|
||||
.filter(r -> containsCommunity(r, c));
|
||||
|
||||
try {
|
||||
community_products.first();
|
||||
community_products
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(outputPath + "/" + c);
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static boolean containsCommunity(CommunityResult r, String c) {
|
||||
if (Optional.ofNullable(r.getContext()).isPresent()) {
|
||||
return r
|
||||
.getContext()
|
||||
.stream()
|
||||
.filter(con -> con.getCode().equals(c))
|
||||
.collect(Collectors.toList())
|
||||
.size() > 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
|
||||
public class ResultProject implements Serializable {
|
||||
private String resultId;
|
||||
private List<Project> projectsList;
|
||||
|
||||
public String getResultId() {
|
||||
return resultId;
|
||||
}
|
||||
|
||||
public void setResultId(String resultId) {
|
||||
this.resultId = resultId;
|
||||
}
|
||||
|
||||
public List<Project> getProjectsList() {
|
||||
return projectsList;
|
||||
}
|
||||
|
||||
public void setProjectsList(List<Project> projectsList) {
|
||||
this.projectsList = projectsList;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
/**
|
||||
* Spark action to trigger the dump of results associated to research community - reseach initiative/infrasctructure The
|
||||
* actual dump if performed via the class DumpProducts that is used also for the entire graph dump
|
||||
*/
|
||||
public class SparkDumpCommunityProducts implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkDumpCommunityProducts.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
String communityMapPath = parser.get("communityMapPath");
|
||||
|
||||
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
DumpProducts dump = new DumpProducts();
|
||||
|
||||
dump
|
||||
.run(
|
||||
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
|
||||
false);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.io.StringReader;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Preparation of the Project information to be added to the dumped results. For each result associated to at least one
|
||||
* Project, a serialization of an instance af ResultProject closs is done. ResultProject contains the resultId, and the
|
||||
* list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to
|
||||
*/
|
||||
public class SparkPrepareResultProject implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkPrepareResultProject.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
prepareResultProjectList(spark, inputPath, outputPath);
|
||||
});
|
||||
}
|
||||
|
||||
private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
|
||||
Dataset<Relation> relation = Utils
|
||||
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||
.filter("dataInfo.deletedbyinference = false and relClass = 'produces'");
|
||||
Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils
|
||||
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
|
||||
|
||||
projects
|
||||
.joinWith(relation, projects.col("id").equalTo(relation.col("source")))
|
||||
.groupByKey(
|
||||
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, String>) value -> value
|
||||
._2()
|
||||
.getTarget(),
|
||||
Encoders.STRING())
|
||||
.mapGroups(
|
||||
(MapGroupsFunction<String, Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, ResultProject>) (s,
|
||||
it) -> {
|
||||
Set<String> projectSet = new HashSet<>();
|
||||
Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next();
|
||||
ResultProject rp = new ResultProject();
|
||||
rp.setResultId(first._2().getTarget());
|
||||
eu.dnetlib.dhp.schema.oaf.Project p = first._1();
|
||||
projectSet.add(p.getId());
|
||||
Project ps = getProject(p);
|
||||
|
||||
List<Project> projList = new ArrayList<>();
|
||||
projList.add(ps);
|
||||
rp.setProjectsList(projList);
|
||||
it.forEachRemaining(c -> {
|
||||
eu.dnetlib.dhp.schema.oaf.Project op = c._1();
|
||||
if (!projectSet.contains(op.getId())) {
|
||||
projList
|
||||
.add(getProject(op));
|
||||
|
||||
projectSet.add(op.getId());
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
return rp;
|
||||
}, Encoders.bean(ResultProject.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op) {
|
||||
Project p = Project
|
||||
.newInstance(
|
||||
op.getId(),
|
||||
op.getCode().getValue(),
|
||||
Optional
|
||||
.ofNullable(op.getAcronym())
|
||||
.map(a -> a.getValue())
|
||||
.orElse(null),
|
||||
Optional
|
||||
.ofNullable(op.getTitle())
|
||||
.map(v -> v.getValue())
|
||||
.orElse(null),
|
||||
Optional
|
||||
.ofNullable(op.getFundingtree())
|
||||
.map(
|
||||
value -> value
|
||||
.stream()
|
||||
.map(ft -> getFunder(ft.getValue()))
|
||||
.collect(Collectors.toList())
|
||||
.get(0))
|
||||
.orElse(null));
|
||||
|
||||
Optional<DataInfo> di = Optional.ofNullable(op.getDataInfo());
|
||||
Provenance provenance = new Provenance();
|
||||
if (di.isPresent()) {
|
||||
provenance.setProvenance(di.get().getProvenanceaction().getClassname());
|
||||
provenance.setTrust(di.get().getTrust());
|
||||
p.setProvenance(provenance);
|
||||
}
|
||||
|
||||
return p;
|
||||
|
||||
}
|
||||
|
||||
private static Funder getFunder(String fundingtree) {
|
||||
// ["<fundingtree><funder><id>nsf_________::NSF</id><shortname>NSF</shortname><name>National Science
|
||||
// Foundation</name><jurisdiction>US</jurisdiction></funder><funding_level_1><id>nsf_________::NSF::CISE/OAD::CISE/CCF</id><description>Division
|
||||
// of Computing and Communication Foundations</description><name>Division of Computing and Communication
|
||||
// Foundations</name><parent><funding_level_0><id>nsf_________::NSF::CISE/OAD</id><description>Directorate for
|
||||
// Computer & Information Science & Engineering</description><name>Directorate for Computer &
|
||||
// Information Science &
|
||||
// Engineering</name><parent/><class>nsf:fundingStream</class></funding_level_0></parent></funding_level_1></fundingtree>"]
|
||||
Funder f = new Funder();
|
||||
final Document doc;
|
||||
try {
|
||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||
f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||
for (Object o : doc.selectNodes("//funding_level_0")) {
|
||||
List node = ((Node) o).selectNodes("./name");
|
||||
f.setFundingStream(((Node) node.get(0)).getText());
|
||||
|
||||
}
|
||||
|
||||
return f;
|
||||
} catch (DocumentException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return f;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
/**
|
||||
* Spark job to trigger the split of results associated to research community - reseach initiative/infrasctructure. The
|
||||
* actual split is performed by the class CommunitySplit
|
||||
*/
|
||||
public class SparkSplitForCommunity implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkSplitForCommunity.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String communityMapPath = parser.get("communityMapPath");
|
||||
|
||||
CommunitySplit split = new CommunitySplit();
|
||||
split.run(isSparkSessionManaged, inputPath, outputPath, communityMapPath);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||
|
||||
public class SparkUpdateProjectInfo implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkUpdateProjectInfo.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkUpdateProjectInfo.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String preparedInfoPath = parser.get("preparedInfoPath");
|
||||
log.info("preparedInfoPath: {}", preparedInfoPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz);
|
||||
});
|
||||
}
|
||||
|
||||
private static void extend(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
String preparedInfoPath) {
|
||||
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
||||
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
||||
result
|
||||
.joinWith(
|
||||
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
||||
"left")
|
||||
.map(value -> {
|
||||
CommunityResult r = value._1();
|
||||
Optional.ofNullable(value._2()).ifPresent(rp -> {
|
||||
r.setProjects(rp.getProjectsList());
|
||||
});
|
||||
return r;
|
||||
}, Encoders.bean(CommunityResult.class))
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Append)
|
||||
.json(outputPath);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Constants implements Serializable {
|
||||
|
||||
public static final String IS_HOSTED_BY = "isHostedBy";
|
||||
public static final String HOSTS = "hosts";
|
||||
|
||||
public static final String IS_FUNDED_BY = "isFundedBy";
|
||||
public static final String FUNDS = "funds";
|
||||
|
||||
public static final String FUNDINGS = "fundings";
|
||||
|
||||
public static final String RESULT_ENTITY = "result";
|
||||
public static final String DATASOURCE_ENTITY = "datasource";
|
||||
public static final String CONTEXT_ENTITY = "context";
|
||||
public static final String ORGANIZATION_ENTITY = "organization";
|
||||
public static final String PROJECT_ENTITY = "project";
|
||||
|
||||
public static final String CONTEXT_ID = "00";
|
||||
public static final String CONTEXT_NS_PREFIX = "context_____";
|
||||
|
||||
// public static final String FUNDER_DS = "entityregistry::projects";
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Deserialization of the information in the context needed to create Context Entities, and relations between context
|
||||
* entities and datasources and projects
|
||||
*/
|
||||
public class ContextInfo implements Serializable {
|
||||
private String id;
|
||||
private String description;
|
||||
private String type;
|
||||
private String zenodocommunity;
|
||||
private String name;
|
||||
private List<String> projectList;
|
||||
private List<String> datasourceList;
|
||||
private List<String> subject;
|
||||
|
||||
public List<String> getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(List<String> subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getZenodocommunity() {
|
||||
return zenodocommunity;
|
||||
}
|
||||
|
||||
public void setZenodocommunity(String zenodocommunity) {
|
||||
this.zenodocommunity = zenodocommunity;
|
||||
}
|
||||
|
||||
public List<String> getProjectList() {
|
||||
return projectList;
|
||||
}
|
||||
|
||||
public void setProjectList(List<String> projectList) {
|
||||
this.projectList = projectList;
|
||||
}
|
||||
|
||||
public List<String> getDatasourceList() {
|
||||
return datasourceList;
|
||||
}
|
||||
|
||||
public void setDatasourceList(List<String> datasourceList) {
|
||||
this.datasourceList = datasourceList;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||
|
||||
/**
|
||||
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
|
||||
* collects the general information for contexes of type community or ri. The general information is the id of the
|
||||
* context, its label, the subjects associated to the context, its zenodo community, description and type. This
|
||||
* information is used to create a new Context Entity
|
||||
*/
|
||||
public class CreateContextEntities implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
|
||||
private final Configuration conf;
|
||||
private final BufferedWriter writer;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
CreateContextEntities.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
log.info("hdfsPath: {}", hdfsPath);
|
||||
|
||||
final String hdfsNameNode = parser.get("nameNode");
|
||||
log.info("nameNode: {}", hdfsNameNode);
|
||||
|
||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||
|
||||
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
|
||||
|
||||
log.info("Processing contexts...");
|
||||
cce.execute(Process::getEntity, isLookUpUrl);
|
||||
|
||||
cce.close();
|
||||
|
||||
}
|
||||
|
||||
private void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
public CreateContextEntities(String hdfsPath, String hdfsNameNode) throws IOException {
|
||||
this.conf = new Configuration();
|
||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||
Path hdfsWritePath = new Path(hdfsPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||
} else {
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
}
|
||||
|
||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
|
||||
}
|
||||
|
||||
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
|
||||
throws Exception {
|
||||
|
||||
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||
|
||||
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
|
||||
|
||||
queryInformationSystem.getContextInformation(consumer);
|
||||
}
|
||||
|
||||
protected <R extends ResearchInitiative> void writeEntity(final R r) {
|
||||
try {
|
||||
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||
// log.info("writing context : {}", new Gson().toJson(r));
|
||||
writer.newLine();
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
|
||||
/**
|
||||
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
|
||||
* and the project is not created because of a low coverage in the profiles of openaire ids related to projects
|
||||
*/
|
||||
public class CreateContextRelation implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
|
||||
private final Configuration conf;
|
||||
private final BufferedWriter writer;
|
||||
private final QueryInformationSystem queryInformationSystem;
|
||||
|
||||
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
|
||||
private static final String CONTEX_RELATION_PROJECT = "projects";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
CreateContextRelation.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
log.info("hdfsPath: {}", hdfsPath);
|
||||
|
||||
final String hdfsNameNode = parser.get("nameNode");
|
||||
log.info("nameNode: {}", hdfsNameNode);
|
||||
|
||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||
|
||||
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
|
||||
|
||||
log.info("Creating relation for datasource...");
|
||||
cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));
|
||||
|
||||
log.info("Creating relations for projects... ");
|
||||
// cce
|
||||
// .execute(
|
||||
// Process::getRelation, CONTEX_RELATION_PROJECT,
|
||||
// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
|
||||
|
||||
cce.close();
|
||||
|
||||
}
|
||||
|
||||
private void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
|
||||
throws IOException, ISLookUpException {
|
||||
this.conf = new Configuration();
|
||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||
|
||||
queryInformationSystem = new QueryInformationSystem();
|
||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||
queryInformationSystem.execContextRelationQuery();
|
||||
|
||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||
Path hdfsWritePath = new Path(hdfsPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||
} else {
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
}
|
||||
|
||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
|
||||
}
|
||||
|
||||
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
|
||||
|
||||
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c));
|
||||
|
||||
queryInformationSystem.getContextRelation(consumer, category, prefix);
|
||||
}
|
||||
|
||||
protected void writeEntity(final Relation r) {
|
||||
try {
|
||||
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||
writer.newLine();
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,496 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.io.StringReader;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
|
||||
/**
|
||||
* Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
|
||||
* Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
|
||||
*/
|
||||
public class DumpGraphEntities implements Serializable {
|
||||
|
||||
public void run(Boolean isSparkSessionManaged,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
Class<? extends OafEntity> inputClazz,
|
||||
String communityMapPath) {
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
|
||||
case "50":
|
||||
DumpProducts d = new DumpProducts();
|
||||
d
|
||||
.run(
|
||||
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, Result.class,
|
||||
true);
|
||||
break;
|
||||
case "40":
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
projectMap(spark, inputPath, outputPath, inputClazz);
|
||||
|
||||
});
|
||||
break;
|
||||
case "20":
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
organizationMap(spark, inputPath, outputPath, inputClazz);
|
||||
|
||||
});
|
||||
break;
|
||||
case "10":
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
datasourceMap(spark, inputPath, outputPath, inputClazz);
|
||||
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
|
||||
Class<E> inputClazz) {
|
||||
Utils
|
||||
.readPath(spark, inputPath, inputClazz)
|
||||
.map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
|
||||
Class<E> inputClazz) {
|
||||
Utils
|
||||
.readPath(spark, inputPath, inputClazz)
|
||||
.map(p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p), Encoders.bean(Project.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
|
||||
Datasource datasource = new Datasource();
|
||||
|
||||
datasource.setId(d.getId());
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getOriginalId())
|
||||
.ifPresent(
|
||||
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getPid())
|
||||
.ifPresent(
|
||||
pids -> pids
|
||||
.stream()
|
||||
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getDatasourcetype())
|
||||
.ifPresent(
|
||||
dsType -> datasource
|
||||
.setDatasourcetype(ControlledField.newInstance(dsType.getClassid(), dsType.getClassname())));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getOpenairecompatibility())
|
||||
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getOfficialname())
|
||||
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getEnglishname())
|
||||
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getWebsiteurl())
|
||||
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getLogourl())
|
||||
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getDateofvalidation())
|
||||
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getDescription())
|
||||
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getSubjects())
|
||||
.ifPresent(
|
||||
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getOdpolicies())
|
||||
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getOdlanguages())
|
||||
.ifPresent(
|
||||
langs -> datasource
|
||||
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getOdcontenttypes())
|
||||
.ifPresent(
|
||||
ctypes -> datasource
|
||||
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getReleasestartdate())
|
||||
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getReleaseenddate())
|
||||
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getMissionstatementurl())
|
||||
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getDatabaseaccesstype())
|
||||
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getDatauploadtype())
|
||||
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getDatabaseaccessrestriction())
|
||||
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getDatauploadrestriction())
|
||||
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getVersioning())
|
||||
.ifPresent(v -> datasource.setVersioning(v.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getCitationguidelineurl())
|
||||
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getPidsystems())
|
||||
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getCertificates())
|
||||
.ifPresent(c -> datasource.setCertificates(c.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getPolicies())
|
||||
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
|
||||
|
||||
Optional
|
||||
.ofNullable(d.getJournal())
|
||||
.ifPresent(j -> datasource.setJournal(getContainer(j)));
|
||||
|
||||
return datasource;
|
||||
|
||||
}
|
||||
|
||||
private static Container getContainer(Journal j) {
|
||||
Container c = new Container();
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getName())
|
||||
.ifPresent(n -> c.setName(n));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getIssnPrinted())
|
||||
.ifPresent(issnp -> c.setIssnPrinted(issnp));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getIssnOnline())
|
||||
.ifPresent(issno -> c.setIssnOnline(issno));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getIssnLinking())
|
||||
.ifPresent(isnl -> c.setIssnLinking(isnl));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getEp())
|
||||
.ifPresent(ep -> c.setEp(ep));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getIss())
|
||||
.ifPresent(iss -> c.setIss(iss));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getSp())
|
||||
.ifPresent(sp -> c.setSp(sp));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getVol())
|
||||
.ifPresent(vol -> c.setVol(vol));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getEdition())
|
||||
.ifPresent(edition -> c.setEdition(edition));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getConferencedate())
|
||||
.ifPresent(cdate -> c.setConferencedate(cdate));
|
||||
|
||||
Optional
|
||||
.ofNullable(j.getConferenceplace())
|
||||
.ifPresent(cplace -> c.setConferenceplace(cplace));
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
|
||||
Project project = new Project();
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getId())
|
||||
.ifPresent(id -> project.setId(id));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getWebsiteurl())
|
||||
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getCode())
|
||||
.ifPresent(code -> project.setCode(code.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getAcronym())
|
||||
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getTitle())
|
||||
.ifPresent(title -> project.setTitle(title.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getStartdate())
|
||||
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getEnddate())
|
||||
.ifPresent(edate -> project.setEnddate(edate.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getCallidentifier())
|
||||
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getKeywords())
|
||||
.ifPresent(key -> project.setKeywords(key.getValue()));
|
||||
|
||||
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
|
||||
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
|
||||
boolean mandate = false;
|
||||
if (omandate.isPresent()) {
|
||||
if (omandate.get().getValue().equals("true")) {
|
||||
mandate = true;
|
||||
}
|
||||
}
|
||||
if (oecsc39.isPresent()) {
|
||||
if (oecsc39.get().getValue().equals("true")) {
|
||||
mandate = true;
|
||||
}
|
||||
}
|
||||
|
||||
project.setOpenaccessmandateforpublications(mandate);
|
||||
project.setOpenaccessmandatefordataset(false);
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getEcarticle29_3())
|
||||
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
|
||||
|
||||
project
|
||||
.setSubject(
|
||||
Optional
|
||||
.ofNullable(p.getSubjects())
|
||||
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
|
||||
.orElse(new ArrayList<>()));
|
||||
|
||||
Optional
|
||||
.ofNullable(p.getSummary())
|
||||
.ifPresent(summary -> project.setSummary(summary.getValue()));
|
||||
|
||||
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
|
||||
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
|
||||
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
|
||||
|
||||
if (ocurrency.isPresent()) {
|
||||
if (ofundedamount.isPresent()) {
|
||||
if (ototalcost.isPresent()) {
|
||||
project
|
||||
.setGranted(
|
||||
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
|
||||
} else {
|
||||
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
project
|
||||
.setProgramme(
|
||||
Optional
|
||||
.ofNullable(p.getProgramme())
|
||||
.map(
|
||||
programme -> programme
|
||||
.stream()
|
||||
.map(pg -> Programme.newInstance(pg.getCode(), pg.getDescription()))
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(new ArrayList<>()));
|
||||
|
||||
Optional<List<Field<String>>> ofundTree = Optional
|
||||
.ofNullable(p.getFundingtree());
|
||||
List<Funder> funList = new ArrayList<>();
|
||||
if (ofundTree.isPresent()) {
|
||||
for (Field<String> fundingtree : ofundTree.get()) {
|
||||
funList.add(getFunder(fundingtree.getValue()));
|
||||
}
|
||||
}
|
||||
project.setFunding(funList);
|
||||
|
||||
return project;
|
||||
}
|
||||
|
||||
public static Funder getFunder(String fundingtree) throws DocumentException {
|
||||
Funder f = new Funder();
|
||||
final Document doc;
|
||||
|
||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||
// f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
|
||||
|
||||
String id = "";
|
||||
String description = "";
|
||||
// List<Levels> fundings = new ArrayList<>();
|
||||
int level = 0;
|
||||
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
|
||||
while (nodes.size() > 0) {
|
||||
for (org.dom4j.Node n : nodes) {
|
||||
|
||||
List node = n.selectNodes("./id");
|
||||
id = ((org.dom4j.Node) node.get(0)).getText();
|
||||
id = id.substring(id.indexOf("::") + 2);
|
||||
|
||||
node = n.selectNodes("./description");
|
||||
description += ((Node) node.get(0)).getText() + " - ";
|
||||
|
||||
}
|
||||
level += 1;
|
||||
nodes = doc.selectNodes("//funding_level_" + level);
|
||||
}
|
||||
|
||||
if (!id.equals("")) {
|
||||
Fundings fundings = new Fundings();
|
||||
fundings.setId(id);
|
||||
fundings.setDescription(description.substring(0, description.length() - 3).trim());
|
||||
f.setFunding_stream(fundings);
|
||||
}
|
||||
|
||||
return f;
|
||||
|
||||
}
|
||||
|
||||
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
|
||||
Class<E> inputClazz) {
|
||||
Utils
|
||||
.readPath(spark, inputPath, inputClazz)
|
||||
.map(o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), Encoders.bean(Organization.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) {
|
||||
Organization organization = new Organization();
|
||||
|
||||
Optional
|
||||
.ofNullable(org.getLegalshortname())
|
||||
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(org.getLegalname())
|
||||
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(org.getWebsiteurl())
|
||||
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
||||
|
||||
Optional
|
||||
.ofNullable(org.getAlternativeNames())
|
||||
.ifPresent(
|
||||
value -> organization
|
||||
.setAlternativenames(
|
||||
value
|
||||
.stream()
|
||||
.map(v -> v.getValue())
|
||||
.collect(Collectors.toList())));
|
||||
|
||||
Optional
|
||||
.ofNullable(org.getCountry())
|
||||
.ifPresent(
|
||||
value -> organization.setCountry(Qualifier.newInstance(value.getClassid(), value.getClassname())));
|
||||
|
||||
Optional
|
||||
.ofNullable(org.getId())
|
||||
.ifPresent(value -> organization.setId(value));
|
||||
|
||||
Optional
|
||||
.ofNullable(org.getPid())
|
||||
.ifPresent(
|
||||
value -> organization
|
||||
.setPid(
|
||||
value
|
||||
.stream()
|
||||
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||
.collect(Collectors.toList())));
|
||||
|
||||
return organization;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,197 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
/**
|
||||
* Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity.
|
||||
* The new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context
|
||||
* related to communities and research initiative/infrastructures.
|
||||
*
|
||||
* For collectedfrom elements it creates: datasource -> provides -> result and result -> isProvidedBy -> datasource
|
||||
* For hostedby elements it creates: datasource -> hosts -> result and result -> isHostedBy -> datasource
|
||||
* For context elements it creates: context <-> isRelatedTo <-> result
|
||||
*/
|
||||
public class Extractor implements Serializable {
|
||||
|
||||
public void run(Boolean isSparkSessionManaged,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
Class<? extends Result> inputClazz,
|
||||
String communityMapPath) {
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
extractRelationResult(
|
||||
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
|
||||
});
|
||||
}
|
||||
|
||||
private <R extends Result> void extractRelationResult(SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
Class<R> inputClazz,
|
||||
CommunityMap communityMap) {
|
||||
|
||||
Set<Integer> hashCodes = new HashSet<>();
|
||||
|
||||
Utils
|
||||
.readPath(spark, inputPath, inputClazz)
|
||||
.flatMap((FlatMapFunction<R, Relation>) value -> {
|
||||
List<Relation> relationList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(value.getInstance())
|
||||
.ifPresent(inst -> inst.forEach(instance -> {
|
||||
Optional
|
||||
.ofNullable(instance.getCollectedfrom())
|
||||
.ifPresent(
|
||||
cf -> getRelatioPair(
|
||||
value, relationList, cf,
|
||||
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
|
||||
Optional
|
||||
.ofNullable(instance.getHostedby())
|
||||
.ifPresent(
|
||||
hb -> getRelatioPair(
|
||||
value, relationList, hb,
|
||||
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
|
||||
}));
|
||||
Set<String> communities = communityMap.keySet();
|
||||
Optional
|
||||
.ofNullable(value.getContext())
|
||||
.ifPresent(contexts -> contexts.forEach(context -> {
|
||||
String id = context.getId();
|
||||
if (id.contains(":")) {
|
||||
id = id.substring(0, id.indexOf(":"));
|
||||
}
|
||||
if (communities.contains(id)) {
|
||||
String contextId = Utils.getContextId(id);
|
||||
Provenance provenance = Optional
|
||||
.ofNullable(context.getDataInfo())
|
||||
.map(
|
||||
dinfo -> Optional
|
||||
.ofNullable(dinfo.get(0).getProvenanceaction())
|
||||
.map(
|
||||
paction -> Provenance
|
||||
.newInstance(
|
||||
paction.getClassid(),
|
||||
dinfo.get(0).getTrust()))
|
||||
.orElse(null))
|
||||
.orElse(null);
|
||||
Relation r = getRelation(
|
||||
value.getId(), contextId,
|
||||
Constants.RESULT_ENTITY,
|
||||
Constants.CONTEXT_ENTITY,
|
||||
ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, provenance);
|
||||
if (!hashCodes.contains(r.hashCode())) {
|
||||
relationList
|
||||
.add(r);
|
||||
hashCodes.add(r.hashCode());
|
||||
}
|
||||
r = getRelation(
|
||||
contextId, value.getId(),
|
||||
Constants.CONTEXT_ENTITY,
|
||||
Constants.RESULT_ENTITY,
|
||||
ModelConstants.RELATIONSHIP,
|
||||
ModelConstants.IS_RELATED_TO, provenance);
|
||||
if (!hashCodes.contains(r.hashCode())) {
|
||||
relationList
|
||||
.add(
|
||||
r);
|
||||
hashCodes.add(r.hashCode());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}));
|
||||
|
||||
return relationList.iterator();
|
||||
}, Encoders.bean(Relation.class))
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(outputPath);
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
|
||||
String result_dtasource, String datasource_result,
|
||||
Set<Integer> hashCodes) {
|
||||
Provenance provenance = Optional
|
||||
.ofNullable(cf.getDataInfo())
|
||||
.map(
|
||||
dinfo -> Optional
|
||||
.ofNullable(dinfo.getProvenanceaction())
|
||||
.map(
|
||||
paction -> Provenance
|
||||
.newInstance(
|
||||
paction.getClassid(),
|
||||
dinfo.getTrust()))
|
||||
.orElse(
|
||||
Provenance
|
||||
.newInstance(
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)))
|
||||
.orElse(
|
||||
Provenance
|
||||
.newInstance(
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
|
||||
Relation r = getRelation(
|
||||
value.getId(),
|
||||
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
||||
result_dtasource, ModelConstants.PROVISION,
|
||||
provenance);
|
||||
if (!hashCodes.contains(r.hashCode())) {
|
||||
relationList
|
||||
.add(r);
|
||||
hashCodes.add(r.hashCode());
|
||||
}
|
||||
|
||||
r = getRelation(
|
||||
cf.getKey(), value.getId(),
|
||||
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
||||
datasource_result, ModelConstants.PROVISION,
|
||||
provenance);
|
||||
|
||||
if (!hashCodes.contains(r.hashCode())) {
|
||||
relationList
|
||||
.add(r);
|
||||
hashCodes.add(r.hashCode());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static Relation getRelation(String source, String target, String sourceType, String targetType,
|
||||
String relName, String relType, Provenance provenance) {
|
||||
Relation r = new Relation();
|
||||
r.setSource(Node.newInstance(source, sourceType));
|
||||
r.setTarget(Node.newInstance(target, targetType));
|
||||
r.setReltype(RelType.newInstance(relName, relType));
|
||||
r.setProvenance(provenance);
|
||||
return r;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class MergedRels implements Serializable {
|
||||
private String organizationId;
|
||||
private String representativeId;
|
||||
|
||||
public String getOrganizationId() {
|
||||
return organizationId;
|
||||
}
|
||||
|
||||
public void setOrganizationId(String organizationId) {
|
||||
this.organizationId = organizationId;
|
||||
}
|
||||
|
||||
public String getRepresentativeId() {
|
||||
return representativeId;
|
||||
}
|
||||
|
||||
public void setRepresentativeId(String representativeId) {
|
||||
this.representativeId = representativeId;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
public class OrganizationMap extends HashMap<String, List<String>> {
|
||||
|
||||
public OrganizationMap() {
|
||||
super();
|
||||
}
|
||||
|
||||
public List<String> get(String key) {
|
||||
|
||||
if (super.get(key) == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
return super.get(key);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||
|
||||
/**
|
||||
* It process the ContextInfo information to produce a new Context Entity or a set of Relations between the
|
||||
* generic context entity and datasource/projects related to the context.
|
||||
*
|
||||
*/
|
||||
public class Process implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(Process.class);
|
||||
|
||||
public static <R extends ResearchInitiative> R getEntity(ContextInfo ci) {
|
||||
try {
|
||||
ResearchInitiative ri;
|
||||
if (ci.getType().equals("community")) {
|
||||
ri = new ResearchCommunity();
|
||||
((ResearchCommunity) ri).setSubject(ci.getSubject());
|
||||
ri.setType(Constants.RESEARCH_COMMUNITY);
|
||||
} else {
|
||||
ri = new ResearchInitiative();
|
||||
ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
|
||||
}
|
||||
ri.setId(Utils.getContextId(ci.getId()));
|
||||
ri.setOriginalId(ci.getId());
|
||||
|
||||
ri.setDescription(ci.getDescription());
|
||||
ri.setName(ci.getName());
|
||||
ri.setZenodo_community(Constants.ZENODO_COMMUNITY_PREFIX + ci.getZenodocommunity());
|
||||
return (R) ri;
|
||||
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static List<Relation> getRelation(ContextInfo ci) {
|
||||
try {
|
||||
|
||||
List<Relation> relationList = new ArrayList<>();
|
||||
ci
|
||||
.getDatasourceList()
|
||||
.forEach(ds -> {
|
||||
|
||||
String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2));
|
||||
|
||||
String contextId = Utils.getContextId(ci.getId());
|
||||
relationList
|
||||
.add(
|
||||
Relation
|
||||
.newInstance(
|
||||
Node
|
||||
.newInstance(
|
||||
contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
|
||||
Node.newInstance(ds, nodeType),
|
||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||
Provenance
|
||||
.newInstance(
|
||||
Constants.USER_CLAIM,
|
||||
Constants.DEFAULT_TRUST)));
|
||||
|
||||
relationList
|
||||
.add(
|
||||
Relation
|
||||
.newInstance(
|
||||
Node.newInstance(ds, nodeType),
|
||||
Node
|
||||
.newInstance(
|
||||
contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
|
||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||
Provenance
|
||||
.newInstance(
|
||||
Constants.USER_CLAIM,
|
||||
Constants.DEFAULT_TRUST)));
|
||||
|
||||
});
|
||||
|
||||
return relationList;
|
||||
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,132 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.*;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class QueryInformationSystem {
|
||||
|
||||
private ISLookUpService isLookUp;
|
||||
private List<String> contextRelationResult;
|
||||
|
||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||
+
|
||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||
" and $x//context/param[./@name = 'status']/text() = 'all' " +
|
||||
" return " +
|
||||
"$x//context";
|
||||
|
||||
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||
+
|
||||
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
|
||||
+
|
||||
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
|
||||
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
|
||||
+
|
||||
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
|
||||
|
||||
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
|
||||
|
||||
isLookUp
|
||||
.quickSearchProfile(XQUERY_ENTITY)
|
||||
.forEach(c -> {
|
||||
ContextInfo cinfo = new ContextInfo();
|
||||
String[] cSplit = c.split("@@");
|
||||
cinfo.setId(cSplit[0]);
|
||||
cinfo.setName(cSplit[1]);
|
||||
cinfo.setDescription(cSplit[2]);
|
||||
if (!cSplit[3].trim().equals("")) {
|
||||
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
|
||||
}
|
||||
cinfo.setZenodocommunity(cSplit[4]);
|
||||
cinfo.setType(cSplit[5]);
|
||||
consumer.accept(cinfo);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
public List<String> getContextRelationResult() {
|
||||
return contextRelationResult;
|
||||
}
|
||||
|
||||
public void setContextRelationResult(List<String> contextRelationResult) {
|
||||
this.contextRelationResult = contextRelationResult;
|
||||
}
|
||||
|
||||
public ISLookUpService getIsLookUp() {
|
||||
return isLookUp;
|
||||
}
|
||||
|
||||
public void setIsLookUp(ISLookUpService isLookUpService) {
|
||||
this.isLookUp = isLookUpService;
|
||||
}
|
||||
|
||||
public void execContextRelationQuery() throws ISLookUpException {
|
||||
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
|
||||
|
||||
}
|
||||
|
||||
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
|
||||
|
||||
contextRelationResult.forEach(xml -> {
|
||||
ContextInfo cinfo = new ContextInfo();
|
||||
final Document doc;
|
||||
|
||||
try {
|
||||
|
||||
doc = new SAXReader().read(new StringReader(xml));
|
||||
Element root = doc.getRootElement();
|
||||
cinfo.setId(root.attributeValue("id"));
|
||||
|
||||
Iterator it = root.elementIterator();
|
||||
while (it.hasNext()) {
|
||||
Element el = (Element) it.next();
|
||||
if (el.getName().equals("category")) {
|
||||
String categoryId = el.attributeValue("id");
|
||||
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
||||
if (categoryId.equals(category)) {
|
||||
cinfo.setDatasourceList(getCategoryList(el, prefix));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
consumer.accept(cinfo);
|
||||
} catch (DocumentException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private List<String> getCategoryList(Element el, String prefix) {
|
||||
List<String> datasourceList = new ArrayList<>();
|
||||
for (Object node : el.selectNodes(".//param")) {
|
||||
Node n = (Node) node;
|
||||
if (n.valueOf("./@name").equals("openaireId")) {
|
||||
datasourceList.add(prefix + "|" + n.getText());
|
||||
}
|
||||
}
|
||||
|
||||
return datasourceList;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||
|
||||
/**
|
||||
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
|
||||
*
|
||||
*/
|
||||
public class SparkCollectAndSave implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkCollectAndSave.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkCollectAndSave.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_collect_and_save.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath + "/result");
|
||||
run(spark, inputPath, outputPath);
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private static void run(SparkSession spark, String inputPath, String outputPath) {
|
||||
Utils
|
||||
.readPath(spark, inputPath + "/result/publication", Result.class)
|
||||
.union(Utils.readPath(spark, inputPath + "/result/dataset", Result.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", Result.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/result/software", Result.class))
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(outputPath + "/result");
|
||||
|
||||
Utils
|
||||
.readPath(spark, inputPath + "/relation/publication", Relation.class)
|
||||
.union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
|
||||
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath + "/relation");
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
|
||||
/**
|
||||
* Spark Job that fires the dump for the entites
|
||||
*/
|
||||
public class SparkDumpEntitiesJob implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkDumpEntitiesJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final String communityMapPath = parser.get("communityMapPath");
|
||||
|
||||
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
|
||||
|
||||
DumpGraphEntities dg = new DumpGraphEntities();
|
||||
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
/**
|
||||
* Dumps eu.dnetlib.dhp.schema.oaf.Relation in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||
*/
|
||||
public class SparkDumpRelationJob implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkDumpRelationJob.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkDumpRelationJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
dumpRelation(spark, inputPath, outputPath);
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) {
|
||||
Utils
|
||||
.readPath(spark, inputPath, Relation.class)
|
||||
.map(relation -> {
|
||||
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
||||
rel
|
||||
.setSource(
|
||||
Node
|
||||
.newInstance(
|
||||
relation.getSource(),
|
||||
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
|
||||
|
||||
rel
|
||||
.setTarget(
|
||||
Node
|
||||
.newInstance(
|
||||
relation.getTarget(),
|
||||
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
|
||||
|
||||
rel
|
||||
.setReltype(
|
||||
RelType
|
||||
.newInstance(
|
||||
relation.getRelClass(),
|
||||
relation.getSubRelType()));
|
||||
|
||||
Optional
|
||||
.ofNullable(relation.getDataInfo())
|
||||
.ifPresent(
|
||||
datainfo -> rel
|
||||
.setProvenance(
|
||||
Provenance
|
||||
.newInstance(datainfo.getProvenanceaction().getClassname(), datainfo.getTrust())));
|
||||
|
||||
return rel;
|
||||
|
||||
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(outputPath);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
/**
|
||||
* Spark job that fires the extraction of relations from entities
|
||||
*/
|
||||
public class SparkExtractRelationFromEntities implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkExtractRelationFromEntities.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkExtractRelationFromEntities.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final String communityMapPath = parser.get("communityMapPath");
|
||||
|
||||
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
Extractor extractor = new Extractor();
|
||||
extractor.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,161 @@
|
|||
|
||||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
/**
|
||||
* Create new Relations between Context Entities and Organizations whose products are associated to the context.
|
||||
* It produces relation such as: organization <-> isRelatedTo <-> context
|
||||
*/
|
||||
public class SparkOrganizationRelation implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkOrganizationRelation.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final OrganizationMap organizationMap = new Gson()
|
||||
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
||||
log.info("organization map : {}", new Gson().toJson(organizationMap));
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
extractRelation(spark, inputPath, organizationMap, outputPath);
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
|
||||
String outputPath) {
|
||||
Dataset<Relation> relationDataset = Utils.readPath(spark, inputPath, Relation.class);
|
||||
|
||||
relationDataset.createOrReplaceTempView("relation");
|
||||
|
||||
List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList = new ArrayList<>();
|
||||
|
||||
Dataset<MergedRels> mergedRelsDataset = spark
|
||||
.sql(
|
||||
"SELECT target organizationId, source representativeId " +
|
||||
"FROM relation " +
|
||||
"WHERE datainfo.deletedbyinference = false " +
|
||||
"AND relclass = 'merges' " +
|
||||
"AND substr(source, 1, 2) = '20'")
|
||||
.as(Encoders.bean(MergedRels.class));
|
||||
|
||||
mergedRelsDataset.map((MapFunction<MergedRels, MergedRels>) mergedRels -> {
|
||||
if (organizationMap.containsKey(mergedRels.getOrganizationId())) {
|
||||
return mergedRels;
|
||||
}
|
||||
return null;
|
||||
}, Encoders.bean(MergedRels.class))
|
||||
.filter(Objects::nonNull)
|
||||
.collectAsList()
|
||||
.forEach(getMergedRelsConsumer(organizationMap, relList));
|
||||
|
||||
organizationMap
|
||||
.keySet()
|
||||
.forEach(
|
||||
oId -> organizationMap
|
||||
.get(oId)
|
||||
.forEach(community -> addRelations(relList, community, oId)));
|
||||
|
||||
spark
|
||||
.createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
|
||||
List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList) {
|
||||
return mergedRels -> {
|
||||
String oId = mergedRels.getOrganizationId();
|
||||
organizationMap
|
||||
.get(oId)
|
||||
.forEach(community -> addRelations(relList, community, mergedRels.getRepresentativeId()));
|
||||
organizationMap.remove(oId);
|
||||
};
|
||||
}
|
||||
|
||||
private static void addRelations(List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList, String community,
|
||||
String organization) {
|
||||
|
||||
String id = Utils.getContextId(community);
|
||||
log.info("create relation for organization: {}", organization);
|
||||
relList
|
||||
.add(
|
||||
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||
.newInstance(
|
||||
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
||||
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||
Provenance
|
||||
.newInstance(
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
|
||||
|
||||
relList
|
||||
.add(
|
||||
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||
.newInstance(
|
||||
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
||||
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
||||
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||
Provenance
|
||||
.newInstance(
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
|
||||
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
[
|
||||
|
||||
{
|
||||
"paramName":"is",
|
||||
"paramLongName":"isLookUpUrl",
|
||||
"paramDescription": "URL of the isLookUp Service",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"nn",
|
||||
"paramLongName":"nameNode",
|
||||
"paramDescription": "the name node",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "hdp",
|
||||
"paramLongName": "hdfsPath",
|
||||
"paramDescription": "the path used to store the output archive",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"nn",
|
||||
"paramLongName":"nameNode",
|
||||
"paramDescription": "the name node",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"cmp",
|
||||
"paramLongName":"communityMapPath",
|
||||
"paramDescription": "the path to the serialization of the community map",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"tn",
|
||||
"paramLongName":"resultTableName",
|
||||
"paramDescription": "the name of the result table we are currently working on",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<value>openaire</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,431 @@
|
|||
<workflow-app name="dump_community_products" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookUpUrl</name>
|
||||
<description>the isLookup service endpoint</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>accessToken</name>
|
||||
<description>the access token used for the deposition in Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>connectionUrl</name>
|
||||
<description>the connection url for Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>metadata</name>
|
||||
<description> the metadata associated to the deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>newDeposition</name>
|
||||
<description>true if it is a brand new depositon. false for new version of an old deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>conceptRecordId</name>
|
||||
<description>for new version, the id of the record for the old deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<description>the target hive database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<description>hive server jdbc url</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<description>hive server metastore URIs</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="save_community_map"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="save_community_map">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</java>
|
||||
<ok to="fork_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_dump">
|
||||
<path start="dump_publication"/>
|
||||
<path start="dump_dataset"/>
|
||||
<path start="dump_orp"/>
|
||||
<path start="dump_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="dump_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table publication for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table dataset for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table ORP for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table software for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_dump" to="prepareResultProject"/>
|
||||
|
||||
<action name="prepareResultProject">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Prepare association result subset of project info</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="fork_extendWithProject"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_extendWithProject">
|
||||
<path start="extend_publication"/>
|
||||
<path start="extend_dataset"/>
|
||||
<path start="extend_orp"/>
|
||||
<path start="extend_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="extend_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped publications with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extend_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped dataset with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="extend_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped ORP with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="extend_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped software with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_extend" to="splitForCommunities"/>
|
||||
|
||||
<action name="splitForCommunities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Split dumped result for community</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/split</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="make_archive"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="make_archive">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/split</arg>
|
||||
</java>
|
||||
<ok to="send_zenodo"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="send_zenodo">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||
<arg>--newDeposition</arg><arg>${newDeposition}</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -0,0 +1,29 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "pip",
|
||||
"paramLongName": "preparedInfoPath",
|
||||
"paramDescription": "the path of the association result projectlist",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
[
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
|
@ -0,0 +1,542 @@
|
|||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"AccessRight": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "Label for the access mode"
|
||||
},
|
||||
"scheme": {
|
||||
"type": "string",
|
||||
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ControlledField": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scheme": {
|
||||
"type": "string",
|
||||
"description": "The scheme for the resource"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "the value in the scheme"
|
||||
}
|
||||
}
|
||||
},
|
||||
"KeyValue": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "Description of key"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Description of value"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Provenance": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"type": "string",
|
||||
"description": "The provenance of the information"
|
||||
},
|
||||
"trust": {
|
||||
"type": "string",
|
||||
"description": "The trust associated to the information"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"author": {
|
||||
"description": "List of authors of the research results",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"affiliation": {
|
||||
"description": "Affiliations of the author",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"description": "One of the affiliation of the author"
|
||||
}
|
||||
},
|
||||
"fullname": {
|
||||
"type": "string",
|
||||
"description": "Fullname of the author"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "First name of the author"
|
||||
},
|
||||
"pid": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/ControlledField"},
|
||||
{"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
|
||||
]
|
||||
},
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "The provenance of the author's pid"}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "Persistent identifier of the author (e.g. ORCID)"
|
||||
},
|
||||
"rank": {
|
||||
"type": "integer",
|
||||
"description": "Order in which the author appears in the authors list"
|
||||
},
|
||||
"surname": {
|
||||
"type": "string",
|
||||
"description": "Surname of the author"
|
||||
}
|
||||
},
|
||||
"description": "One of the author of the research result"
|
||||
}
|
||||
},
|
||||
"bestaccessright": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/AccessRight"},
|
||||
{"description": "The openest access right associated to the manifestations of this research results"}
|
||||
]
|
||||
},
|
||||
"codeRepositoryUrl": {
|
||||
"type": "string",
|
||||
"description": "Only for results with type 'software': the URL to the repository with the source code"
|
||||
},
|
||||
"collectedfrom": {
|
||||
"description": "Information about the sources from which the record has been collected",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/KeyValue"},
|
||||
{"description": "Key is the OpenAIRE identifier of the data source, value is its name"}
|
||||
]
|
||||
}
|
||||
},
|
||||
"contactgroup": {
|
||||
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"contactperson": {
|
||||
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"container": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"conferencedate": {
|
||||
"type": "string",
|
||||
"description": "Date of the conference"
|
||||
},
|
||||
"conferenceplace": {
|
||||
"type": "string",
|
||||
"description": "Place of the conference"
|
||||
},
|
||||
"edition": {
|
||||
"type": "string",
|
||||
"description": "Edition of the journal or conference proceeding"
|
||||
},
|
||||
"ep": {
|
||||
"type": "string",
|
||||
"description": "End page"
|
||||
},
|
||||
"iss": {
|
||||
"type": "string",
|
||||
"description": "Journal issue"
|
||||
},
|
||||
"issnLinking": {
|
||||
"type": "string",
|
||||
"description": "Journal linking iisn"
|
||||
},
|
||||
"issnOnline": {
|
||||
"type": "string",
|
||||
"description": "Journal online issn"
|
||||
},
|
||||
"issnPrinted": {
|
||||
"type": "string",
|
||||
"description": "Journal printed issn"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name of the journal or conference"
|
||||
},
|
||||
"sp": {
|
||||
"type": "string",
|
||||
"description": "Start page"
|
||||
},
|
||||
"vol": {
|
||||
"type": "string",
|
||||
"description": "Volume"
|
||||
}
|
||||
},
|
||||
"description": "Container has information about the conference or journal where the result has been presented or published"
|
||||
},
|
||||
"context": {
|
||||
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "Code identifying the RI/RC"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "Label of the RI/RC"
|
||||
},
|
||||
"provenance": {
|
||||
"description": "Why this result is associated to the RI/RC.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"}
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"contributor": {
|
||||
"description": "Contributors of this result",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"country": {
|
||||
"description": "Country associated to this result",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "ISO 3166-1 alpha-2 country code"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "English label of the country"
|
||||
},
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "Why this result is associated to the country."}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"coverage": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"dateofcollection": {
|
||||
"type": "string",
|
||||
"description": "When OpenAIRE collected the record the last time"
|
||||
},
|
||||
"description": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"documentationUrl": {
|
||||
"description": "Only for results with type 'software': URL to the software documentation",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
|
||||
}
|
||||
},
|
||||
"embargoenddate": {
|
||||
"type": "string",
|
||||
"description": "Date when the embargo ends and this result turns Open Access"
|
||||
},
|
||||
"externalReference": {
|
||||
"description": "Links to external resources like entries from thematic databases (e.g. Protein Data Bank)",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "Why this result is linked to the external resource"}
|
||||
]
|
||||
},
|
||||
"typology": {
|
||||
"type": "string"
|
||||
},
|
||||
"value": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"format": {
|
||||
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"geolocation": {
|
||||
"description": "Geolocation information",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"box": {
|
||||
"type": "string"
|
||||
},
|
||||
"place": {
|
||||
"type": "string"
|
||||
},
|
||||
"point": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE identifier"
|
||||
},
|
||||
"instance": {
|
||||
"description": "Manifestations (i.e. different versions) of the result. For example: the pre-print and the published versions are two manifestations of the same research result",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"accessright": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/AccessRight"},
|
||||
{"description": "Access right of this instance"}
|
||||
]
|
||||
},
|
||||
"collectedfrom": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/KeyValue"},
|
||||
{"description": "Information about the source from which the instance has been collected. Key is the OpenAIRE identifier of the data source, value is its name"}
|
||||
]
|
||||
},
|
||||
"hostedby": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/KeyValue"},
|
||||
{"description": "Information about the source from which the instance can be viewed or downloaded. Key is the OpenAIRE identifier of the data source, value is its name"}
|
||||
]
|
||||
},
|
||||
"license": {
|
||||
"type": "string",
|
||||
"description": "License applied to the instance"
|
||||
},
|
||||
"publicationdate": {
|
||||
"type": "string",
|
||||
"description": "Publication date of the instance"
|
||||
},
|
||||
"refereed": {
|
||||
"type": "string",
|
||||
"description": "Was the instance subject to peer-review? Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed' (see also https://api.openaire.eu/vocabularies/dnet:review_levels)"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type of the instance. Possible values are listed at https://api.openaire.eu/vocabularies/dnet:publication_resource"
|
||||
},
|
||||
"url": {
|
||||
"description":"Location where the instance is accessible",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"language": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "alpha-3/ISO 639-2 code of the language"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "English label"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lastupdatetimestamp": {
|
||||
"type": "integer",
|
||||
"description": "Timestamp of last update of the record in OpenAIRE"
|
||||
},
|
||||
"maintitle": {
|
||||
"type": "string",
|
||||
"description": "Title"
|
||||
},
|
||||
"originalId": {
|
||||
"description": "Identifiers of the record at the original sources",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"pid": {
|
||||
"description": "Persistent identifiers of the result",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/ControlledField"},
|
||||
{"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "}
|
||||
]
|
||||
}
|
||||
},
|
||||
"programmingLanguage": {
|
||||
"type": "string",
|
||||
"description": "Only for results with type 'software': the programming language"
|
||||
},
|
||||
"projects": {
|
||||
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"acronym": {
|
||||
"type": "string",
|
||||
"description": "Project acronym"
|
||||
},
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "Grant code"
|
||||
},
|
||||
"funder": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"fundingStream": {
|
||||
"type": "string",
|
||||
"description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)"
|
||||
},
|
||||
"jurisdiction": {
|
||||
"type": "string",
|
||||
"description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name of the funder"
|
||||
},
|
||||
"shortName": {
|
||||
"type": "string",
|
||||
"description": "Short name or acronym of the funder"
|
||||
}
|
||||
},
|
||||
"description": "Information about the funder funding the project"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE identifier of the project"
|
||||
},
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "Why this project is associated to the result"}
|
||||
]
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Title of the project"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"publicationdate": {
|
||||
"type": "string",
|
||||
"description": "Date of publication"
|
||||
},
|
||||
"publisher": {
|
||||
"type": "string",
|
||||
"description": "Publisher"
|
||||
},
|
||||
"size": {
|
||||
"type": "string",
|
||||
"description": "Only for results with type 'dataset': the declared size of the dataset"
|
||||
},
|
||||
"source": {
|
||||
"description": "See definition of Dublin Core field dc:source",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"subjects": {
|
||||
"description": "Keywords associated to the result",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "Why this subject is associated to the result"}
|
||||
]
|
||||
},
|
||||
"subject": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/ControlledField"},
|
||||
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary). "}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"subtitle": {
|
||||
"type": "string",
|
||||
"description": "Sub-title of the result"
|
||||
},
|
||||
"tool": {
|
||||
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version of the result"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
|
||||
[
|
||||
|
||||
{
|
||||
"paramName":"cmp",
|
||||
"paramLongName":"communityMapPath",
|
||||
"paramDescription": "the path to the serialization of the community map",
|
||||
"paramRequired": false
|
||||
},
|
||||
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
[
|
||||
{
|
||||
"paramName":"nd",
|
||||
"paramLongName":"newDeposition",
|
||||
"paramDescription": "if it is a new deposition (true) or a new versione (false)",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"cri",
|
||||
"paramLongName":"conceptRecordId",
|
||||
"paramDescription": "The id of the concept record for a new version",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"cmp",
|
||||
"paramLongName":"communityMapPath",
|
||||
"paramDescription": "the path to the serialization of the community map",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"hdfsp",
|
||||
"paramLongName":"hdfsPath",
|
||||
"paramDescription": "the path of the folder tofind files to send to Zenodo",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "hdfsnn",
|
||||
"paramLongName": "hdfsNameNode",
|
||||
"paramDescription": "the name node",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "at",
|
||||
"paramLongName": "accessToken",
|
||||
"paramDescription": "the access token for the deposition",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"cu",
|
||||
"paramLongName":"connectionUrl",
|
||||
"paramDescription": "the url to connect to deposit",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"m",
|
||||
"paramLongName":"metadata",
|
||||
"paramDescription": "metadata associated to the deposition",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
|
@ -0,0 +1,24 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"is",
|
||||
"paramLongName":"isLookUpUrl",
|
||||
"paramDescription": "URL of the isLookUp Service",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "hdfs",
|
||||
"paramLongName": "hdfsPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "nn",
|
||||
"paramLongName": "hdfsNameNode",
|
||||
"paramDescription": "the name node",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue