orcid-no-doi #43
|
@ -19,7 +19,7 @@
|
|||
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="false"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
|
||||
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>
|
||||
|
|
|
@ -87,6 +87,11 @@
|
|||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.internal.Util;
|
||||
import okio.BufferedSink;
|
||||
import okio.Okio;
|
||||
import okio.Source;
|
||||
|
||||
public class InputStreamRequestBody extends RequestBody {
|
||||
|
||||
private InputStream inputStream;
|
||||
private MediaType mediaType;
|
||||
private long lenght;
|
||||
|
||||
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
||||
|
||||
return new InputStreamRequestBody(inputStream, mediaType, len);
|
||||
}
|
||||
|
||||
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
|
||||
this.inputStream = inputStream;
|
||||
this.mediaType = mediaType;
|
||||
this.lenght = len;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MediaType contentType() {
|
||||
return mediaType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long contentLength() {
|
||||
|
||||
return lenght;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(BufferedSink sink) throws IOException {
|
||||
Source source = null;
|
||||
try {
|
||||
source = Okio.source(inputStream);
|
||||
sink.writeAll(source);
|
||||
} finally {
|
||||
Util.closeQuietly(source);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
public class MissingConceptDoiException extends Throwable {
|
||||
public MissingConceptDoiException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,264 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.IOException;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
|
||||
import okhttp3.*;
|
||||
|
||||
public class ZenodoAPIClient implements Serializable {
|
||||
|
||||
String urlString;
|
||||
String bucket;
|
||||
|
||||
String deposition_id;
|
||||
String access_token;
|
||||
|
||||
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
|
||||
|
||||
public String getUrlString() {
|
||||
return urlString;
|
||||
}
|
||||
|
||||
public void setUrlString(String urlString) {
|
||||
this.urlString = urlString;
|
||||
}
|
||||
|
||||
public String getBucket() {
|
||||
return bucket;
|
||||
}
|
||||
|
||||
public void setBucket(String bucket) {
|
||||
this.bucket = bucket;
|
||||
}
|
||||
|
||||
public void setDeposition_id(String deposition_id) {
|
||||
this.deposition_id = deposition_id;
|
||||
}
|
||||
|
||||
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
|
||||
|
||||
this.urlString = urlString;
|
||||
this.access_token = access_token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int newDeposition() throws IOException {
|
||||
String json = "{}";
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, json);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
// Get response body
|
||||
json = response.body().string();
|
||||
|
||||
ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
|
||||
this.bucket = newSubmission.getLinks().getBucket();
|
||||
this.deposition_id = newSubmission.getId();
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload files in Zenodo.
|
||||
* @param is the inputStream for the file to upload
|
||||
* @param file_name the name of the file as it will appear on Zenodo
|
||||
* @param len the size of the file
|
||||
* @return the response code
|
||||
*/
|
||||
public int uploadIS(InputStream is, String file_name, long len) throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(bucket + "/" + file_name)
|
||||
.addHeader("Content-Type", "application/zip") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
return response.code();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Associates metadata information to the current deposition
|
||||
* @param metadata the metadata
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int sendMretadata(String metadata) throws IOException {
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, metadata);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.put(body)
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* To publish the current deposition. It works for both new deposition or new version of an old deposition
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
*/
|
||||
public int publish() throws IOException {
|
||||
|
||||
String json = "{}";
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id + "/actions/publish")
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.code();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* To create a new version of an already published deposition.
|
||||
* It sets the deposition_id and the bucket to be used for the new version.
|
||||
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is
|
||||
* the last part of the url for the DOI Zenodo suggests to use to cite all versions:
|
||||
* DOI: 10.xxx/zenodo.656930 concept_rec_id = 656930
|
||||
* @return response code
|
||||
* @throws IOException
|
||||
* @throws MissingConceptDoiException
|
||||
*/
|
||||
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
setDepositionId(concept_rec_id);
|
||||
String json = "{}";
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||
String latest_draft = zenodoModel.getLinks().getLatest_draft();
|
||||
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
|
||||
bucket = getBucket(latest_draft);
|
||||
return response.code();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
|
||||
|
||||
for (ZenodoModel zm : zenodoModelList) {
|
||||
if (zm.getConceptrecid().equals(concept_rec_id)) {
|
||||
deposition_id = zm.getId();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
|
||||
|
||||
}
|
||||
|
||||
private String getPrevDepositions() throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.get()
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
return response.body().string();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String getBucket(String url) throws IOException {
|
||||
OkHttpClient httpClient = new OkHttpClient();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(url)
|
||||
.addHeader("Content-Type", "application/json") // add request headers
|
||||
.addHeader("Authorization", "Bearer " + access_token)
|
||||
.get()
|
||||
.build();
|
||||
|
||||
try (Response response = httpClient.newCall(request).execute()) {
|
||||
|
||||
if (!response.isSuccessful())
|
||||
throw new IOException("Unexpected code " + response + response.body().string());
|
||||
|
||||
// Get response body
|
||||
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
|
||||
|
||||
return zenodoModel.getLinks().getBucket();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
public class Community {
|
||||
private String identifier;
|
||||
|
||||
public String getIdentifier() {
|
||||
return identifier;
|
||||
}
|
||||
|
||||
public void setIdentifier(String identifier) {
|
||||
this.identifier = identifier;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
public class Creator {
|
||||
private String affiliation;
|
||||
private String name;
|
||||
private String orcid;
|
||||
|
||||
public String getAffiliation() {
|
||||
return affiliation;
|
||||
}
|
||||
|
||||
public void setAffiliation(String affiliation) {
|
||||
this.affiliation = affiliation;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getOrcid() {
|
||||
return orcid;
|
||||
}
|
||||
|
||||
public void setOrcid(String orcid) {
|
||||
this.orcid = orcid;
|
||||
}
|
||||
|
||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||
Creator c = new Creator();
|
||||
if (!(name == null)) {
|
||||
c.name = name;
|
||||
}
|
||||
if (!(affiliation == null)) {
|
||||
c.affiliation = affiliation;
|
||||
}
|
||||
if (!(orcid == null)) {
|
||||
c.orcid = orcid;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import net.minidev.json.annotate.JsonIgnore;
|
||||
|
||||
public class File implements Serializable {
|
||||
private String checksum;
|
||||
private String filename;
|
||||
private long filesize;
|
||||
private String id;
|
||||
|
||||
@JsonIgnore
|
||||
// private Links links;
|
||||
|
||||
public String getChecksum() {
|
||||
return checksum;
|
||||
}
|
||||
|
||||
public void setChecksum(String checksum) {
|
||||
this.checksum = checksum;
|
||||
}
|
||||
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
|
||||
public void setFilename(String filename) {
|
||||
this.filename = filename;
|
||||
}
|
||||
|
||||
public long getFilesize() {
|
||||
return filesize;
|
||||
}
|
||||
|
||||
public void setFilesize(long filesize) {
|
||||
this.filesize = filesize;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
// @JsonIgnore
|
||||
// public Links getLinks() {
|
||||
// return links;
|
||||
// }
|
||||
//
|
||||
// @JsonIgnore
|
||||
// public void setLinks(Links links) {
|
||||
// this.links = links;
|
||||
// }
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Grant implements Serializable {
|
||||
private String id;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public static Grant newInstance(String id) {
|
||||
Grant g = new Grant();
|
||||
g.id = id;
|
||||
|
||||
return g;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Links implements Serializable {
|
||||
|
||||
private String bucket;
|
||||
|
||||
private String discard;
|
||||
|
||||
private String edit;
|
||||
private String files;
|
||||
private String html;
|
||||
private String latest_draft;
|
||||
private String latest_draft_html;
|
||||
private String publish;
|
||||
|
||||
private String self;
|
||||
|
||||
public String getBucket() {
|
||||
return bucket;
|
||||
}
|
||||
|
||||
public void setBucket(String bucket) {
|
||||
this.bucket = bucket;
|
||||
}
|
||||
|
||||
public String getDiscard() {
|
||||
return discard;
|
||||
}
|
||||
|
||||
public void setDiscard(String discard) {
|
||||
this.discard = discard;
|
||||
}
|
||||
|
||||
public String getEdit() {
|
||||
return edit;
|
||||
}
|
||||
|
||||
public void setEdit(String edit) {
|
||||
this.edit = edit;
|
||||
}
|
||||
|
||||
public String getFiles() {
|
||||
return files;
|
||||
}
|
||||
|
||||
public void setFiles(String files) {
|
||||
this.files = files;
|
||||
}
|
||||
|
||||
public String getHtml() {
|
||||
return html;
|
||||
}
|
||||
|
||||
public void setHtml(String html) {
|
||||
this.html = html;
|
||||
}
|
||||
|
||||
public String getLatest_draft() {
|
||||
return latest_draft;
|
||||
}
|
||||
|
||||
public void setLatest_draft(String latest_draft) {
|
||||
this.latest_draft = latest_draft;
|
||||
}
|
||||
|
||||
public String getLatest_draft_html() {
|
||||
return latest_draft_html;
|
||||
}
|
||||
|
||||
public void setLatest_draft_html(String latest_draft_html) {
|
||||
this.latest_draft_html = latest_draft_html;
|
||||
}
|
||||
|
||||
public String getPublish() {
|
||||
return publish;
|
||||
}
|
||||
|
||||
public void setPublish(String publish) {
|
||||
this.publish = publish;
|
||||
}
|
||||
|
||||
public String getSelf() {
|
||||
return self;
|
||||
}
|
||||
|
||||
public void setSelf(String self) {
|
||||
this.self = self;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class Metadata implements Serializable {
|
||||
|
||||
private String access_right;
|
||||
private List<Community> communities;
|
||||
private List<Creator> creators;
|
||||
private String description;
|
||||
private String doi;
|
||||
private List<Grant> grants;
|
||||
private List<String> keywords;
|
||||
private String language;
|
||||
private String license;
|
||||
private PrereserveDoi prereserve_doi;
|
||||
private String publication_date;
|
||||
private List<String> references;
|
||||
private List<RelatedIdentifier> related_identifiers;
|
||||
private String title;
|
||||
private String upload_type;
|
||||
private String version;
|
||||
|
||||
public String getUpload_type() {
|
||||
return upload_type;
|
||||
}
|
||||
|
||||
public void setUpload_type(String upload_type) {
|
||||
this.upload_type = upload_type;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(String version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public String getAccess_right() {
|
||||
return access_right;
|
||||
}
|
||||
|
||||
public void setAccess_right(String access_right) {
|
||||
this.access_right = access_right;
|
||||
}
|
||||
|
||||
public List<Community> getCommunities() {
|
||||
return communities;
|
||||
}
|
||||
|
||||
public void setCommunities(List<Community> communities) {
|
||||
this.communities = communities;
|
||||
}
|
||||
|
||||
public List<Creator> getCreators() {
|
||||
return creators;
|
||||
}
|
||||
|
||||
public void setCreators(List<Creator> creators) {
|
||||
this.creators = creators;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public List<Grant> getGrants() {
|
||||
return grants;
|
||||
}
|
||||
|
||||
public void setGrants(List<Grant> grants) {
|
||||
this.grants = grants;
|
||||
}
|
||||
|
||||
public List<String> getKeywords() {
|
||||
return keywords;
|
||||
}
|
||||
|
||||
public void setKeywords(List<String> keywords) {
|
||||
this.keywords = keywords;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public String getLicense() {
|
||||
return license;
|
||||
}
|
||||
|
||||
public void setLicense(String license) {
|
||||
this.license = license;
|
||||
}
|
||||
|
||||
public PrereserveDoi getPrereserve_doi() {
|
||||
return prereserve_doi;
|
||||
}
|
||||
|
||||
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
|
||||
this.prereserve_doi = prereserve_doi;
|
||||
}
|
||||
|
||||
public String getPublication_date() {
|
||||
return publication_date;
|
||||
}
|
||||
|
||||
public void setPublication_date(String publication_date) {
|
||||
this.publication_date = publication_date;
|
||||
}
|
||||
|
||||
public List<String> getReferences() {
|
||||
return references;
|
||||
}
|
||||
|
||||
public void setReferences(List<String> references) {
|
||||
this.references = references;
|
||||
}
|
||||
|
||||
public List<RelatedIdentifier> getRelated_identifiers() {
|
||||
return related_identifiers;
|
||||
}
|
||||
|
||||
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
|
||||
this.related_identifiers = related_identifiers;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class PrereserveDoi implements Serializable {
|
||||
private String doi;
|
||||
private String recid;
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public String getRecid() {
|
||||
return recid;
|
||||
}
|
||||
|
||||
public void setRecid(String recid) {
|
||||
this.recid = recid;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class RelatedIdentifier implements Serializable {
|
||||
private String identifier;
|
||||
private String relation;
|
||||
private String resource_type;
|
||||
private String scheme;
|
||||
|
||||
public String getIdentifier() {
|
||||
return identifier;
|
||||
}
|
||||
|
||||
public void setIdentifier(String identifier) {
|
||||
this.identifier = identifier;
|
||||
}
|
||||
|
||||
public String getRelation() {
|
||||
return relation;
|
||||
}
|
||||
|
||||
public void setRelation(String relation) {
|
||||
this.relation = relation;
|
||||
}
|
||||
|
||||
public String getResource_type() {
|
||||
return resource_type;
|
||||
}
|
||||
|
||||
public void setResource_type(String resource_type) {
|
||||
this.resource_type = resource_type;
|
||||
}
|
||||
|
||||
public String getScheme() {
|
||||
return scheme;
|
||||
}
|
||||
|
||||
public void setScheme(String scheme) {
|
||||
this.scheme = scheme;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class ZenodoModel implements Serializable {
|
||||
|
||||
private String conceptrecid;
|
||||
private String created;
|
||||
|
||||
private List<File> files;
|
||||
private String id;
|
||||
private Links links;
|
||||
private Metadata metadata;
|
||||
private String modified;
|
||||
private String owner;
|
||||
private String record_id;
|
||||
private String state;
|
||||
private boolean submitted;
|
||||
private String title;
|
||||
|
||||
public String getConceptrecid() {
|
||||
return conceptrecid;
|
||||
}
|
||||
|
||||
public void setConceptrecid(String conceptrecid) {
|
||||
this.conceptrecid = conceptrecid;
|
||||
}
|
||||
|
||||
public String getCreated() {
|
||||
return created;
|
||||
}
|
||||
|
||||
public void setCreated(String created) {
|
||||
this.created = created;
|
||||
}
|
||||
|
||||
public List<File> getFiles() {
|
||||
return files;
|
||||
}
|
||||
|
||||
public void setFiles(List<File> files) {
|
||||
this.files = files;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Links getLinks() {
|
||||
return links;
|
||||
}
|
||||
|
||||
public void setLinks(Links links) {
|
||||
this.links = links;
|
||||
}
|
||||
|
||||
public Metadata getMetadata() {
|
||||
return metadata;
|
||||
}
|
||||
|
||||
public void setMetadata(Metadata metadata) {
|
||||
this.metadata = metadata;
|
||||
}
|
||||
|
||||
public String getModified() {
|
||||
return modified;
|
||||
}
|
||||
|
||||
public void setModified(String modified) {
|
||||
this.modified = modified;
|
||||
}
|
||||
|
||||
public String getOwner() {
|
||||
return owner;
|
||||
}
|
||||
|
||||
public void setOwner(String owner) {
|
||||
this.owner = owner;
|
||||
}
|
||||
|
||||
public String getRecord_id() {
|
||||
return record_id;
|
||||
}
|
||||
|
||||
public void setRecord_id(String record_id) {
|
||||
this.record_id = record_id;
|
||||
}
|
||||
|
||||
public String getState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
public void setState(String state) {
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public boolean isSubmitted() {
|
||||
return submitted;
|
||||
}
|
||||
|
||||
public void setSubmitted(boolean submitted) {
|
||||
this.submitted = submitted;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api.zenodo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class ZenodoModelList extends ArrayList<ZenodoModel> {
|
||||
}
|
|
@ -1,15 +1,22 @@
|
|||
|
||||
package eu.dnetlib.dhp.utils;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.xml.ws.BindingProvider;
|
||||
|
||||
import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class ISLookupClientFactory {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ISLookupClientFactory.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
|
||||
|
||||
private static int requestTimeout = 60000 * 10;
|
||||
private static int connectTimeout = 60000 * 10;
|
||||
|
||||
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
||||
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
||||
|
@ -21,6 +28,25 @@ public class ISLookupClientFactory {
|
|||
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
||||
jaxWsProxyFactory.setServiceClass(clazz);
|
||||
jaxWsProxyFactory.setAddress(endpoint);
|
||||
return (T) jaxWsProxyFactory.create();
|
||||
|
||||
final T service = (T) jaxWsProxyFactory.create();
|
||||
|
||||
if (service instanceof BindingProvider) {
|
||||
log
|
||||
.info(
|
||||
"setting timeouts for {} to requestTimeout: {}, connectTimeout: {}",
|
||||
BindingProvider.class.getName(), requestTimeout, connectTimeout);
|
||||
|
||||
Map<String, Object> requestContext = ((BindingProvider) service).getRequestContext();
|
||||
|
||||
requestContext.put("com.sun.xml.internal.ws.request.timeout", requestTimeout);
|
||||
requestContext.put("com.sun.xml.internal.ws.connect.timeout", connectTimeout);
|
||||
requestContext.put("com.sun.xml.ws.request.timeout", requestTimeout);
|
||||
requestContext.put("com.sun.xml.ws.connect.timeout", connectTimeout);
|
||||
requestContext.put("javax.xml.ws.client.receiveTimeout", requestTimeout);
|
||||
requestContext.put("javax.xml.ws.client.connectionTimeout", connectTimeout);
|
||||
}
|
||||
|
||||
return service;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.api;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@Disabled
|
||||
public class ZenodoAPIClientTest {
|
||||
|
||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||
private final String ACCESS_TOKEN = "";
|
||||
|
||||
private final String CONCEPT_REC_ID = "657113";
|
||||
|
||||
@Test
|
||||
public void testNewDeposition() throws IOException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
Assertions.assertEquals(201, client.newDeposition());
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
|
||||
|
||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
||||
|
||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
|
||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||
ACCESS_TOKEN);
|
||||
|
||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
||||
|
||||
File file = new File(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
|
||||
.getPath());
|
||||
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
|
||||
|
||||
Assertions.assertEquals(202, client.publish());
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"metadata":{"access_right":"open","communities":[{"identifier":"openaire-research-graph"}],"creators":[{"affiliation":"ISTI - CNR","name":"Bardi, Alessia","orcid":"0000-0002-1112-1292"},{"affiliation":"eifl", "name":"Kuchma, Iryna"},{"affiliation":"BIH", "name":"Brobov, Evgeny"},{"affiliation":"GIDIF RBM", "name":"Truccolo, Ivana"},{"affiliation":"unesp", "name":"Monteiro, Elizabete"},{"affiliation":"und", "name":"Casalegno, Carlotta"},{"affiliation":"CARL ABRC", "name":"Clary, Erin"},{"affiliation":"The University of Edimburgh", "name":"Romanowski, Andrew"},{"affiliation":"ISTI - CNR", "name":"Pavone, Gina"},{"affiliation":"ISTI - CNR", "name":"Artini, Michele"},{"affiliation":"ISTI - CNR","name":"Atzori, Claudio","orcid":"0000-0001-9613-6639"},{"affiliation":"University of Bielefeld","name":"Bäcker, Amelie","orcid":"0000-0001-6015-2063"},{"affiliation":"ISTI - CNR","name":"Baglioni, Miriam","orcid":"0000-0002-2273-9004"},{"affiliation":"University of Bielefeld","name":"Czerniak, Andreas","orcid":"0000-0003-3883-4169"},{"affiliation":"ISTI - CNR","name":"De Bonis, Michele"},{"affiliation":"Athena Research and Innovation Centre","name":"Dimitropoulos, Harry"},{"affiliation":"Athena Research and Innovation Centre","name":"Foufoulas, Ioannis"},{"affiliation":"University of Warsaw","name":"Horst, Marek"},{"affiliation":"Athena Research and Innovation Centre","name":"Iatropoulou, Katerina"},{"affiliation":"University of Warsaw","name":"Jacewicz, Przemyslaw"},{"affiliation":"Athena Research and Innovation Centre","name":"Kokogiannaki, Argiro", "orcid":"0000-0002-3880-0244"},{"affiliation":"ISTI - CNR","name":"La Bruzzo, Sandro","orcid":"0000-0003-2855-1245"},{"affiliation":"ISTI - CNR","name":"Lazzeri, Emma"},{"affiliation":"University of Bielefeld","name":"Löhden, Aenne"},{"affiliation":"ISTI - CNR","name":"Manghi, Paolo","orcid":"0000-0001-7291-3210"},{"affiliation":"ISTI - CNR","name":"Mannocci, Andrea","orcid":"0000-0002-5193-7851"},{"affiliation":"Athena Research and Innovation Center","name":"Manola, Natalia"},{"affiliation":"ISTI - CNR","name":"Ottonello, Enrico"},{"affiliation":"University of Bielefeld","name":"Shirrwagen, Jochen"}],"description":"\\u003cp\\u003eThis dump provides access to the metadata records of publications, research data, software and projects that may be relevant to the Corona Virus Disease (COVID-19) fight. The dump contains records of the OpenAIRE COVID-19 Gateway (https://covid-19.openaire.eu/), identified via full-text mining and inference techniques applied to the OpenAIRE Research Graph (https://explore.openaire.eu/). The Graph is one of the largest Open Access collections of metadata records and links between publications, datasets, software, projects, funders, and organizations, aggregating 12,000+ scientific data sources world-wide, among which the Covid-19 data sources Zenodo COVID-19 Community, WHO (World Health Organization), BIP! FInder for COVID-19, Protein Data Bank, Dimensions, scienceOpen, and RSNA. \\u003cp\\u003eThe dump consists of a gzip file containing one json per line. Each json is compliant to the schema available at https://doi.org/10.5281/zenodo.3974226\\u003c/p\\u003e ","title":"OpenAIRE Covid-19 publications, datasets, software and projects metadata.","upload_type":"dataset","version":"1.0"}}
|
|
@ -0,0 +1 @@
|
|||
This is a test for a new deposition
|
|
@ -0,0 +1 @@
|
|||
This is a test for a new version of an old deposition
|
|
@ -0,0 +1,2 @@
|
|||
This is a test for a new version of an old deposition. This should replace the other new version. I expect to have only two
|
||||
files in the deposition
|
|
@ -7,6 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|||
|
||||
public class ModelConstants {
|
||||
|
||||
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
|
||||
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
|
||||
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
|
||||
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
|
||||
|
|
|
@ -79,6 +79,15 @@ public class ModelSupport {
|
|||
entityIdPrefix.put("result", "50");
|
||||
}
|
||||
|
||||
public static final Map<String, String> idPrefixEntity = Maps.newHashMap();
|
||||
|
||||
static {
|
||||
idPrefixEntity.put("10", "datasource");
|
||||
idPrefixEntity.put("20", "organization");
|
||||
idPrefixEntity.put("40", "project");
|
||||
idPrefixEntity.put("50", "result");
|
||||
}
|
||||
|
||||
public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
|
||||
|
||||
static {
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
|
||||
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount
|
||||
*/
|
||||
public class APC implements Serializable {
|
||||
private String currency;
|
||||
private String amount;
|
||||
|
||||
public String getCurrency() {
|
||||
return currency;
|
||||
}
|
||||
|
||||
public void setCurrency(String currency) {
|
||||
this.currency = currency;
|
||||
}
|
||||
|
||||
public String getAmount() {
|
||||
return amount;
|
||||
}
|
||||
|
||||
public void setAmount(String amount) {
|
||||
this.amount = amount;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
/**
|
||||
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier
|
||||
* element with a parameter scheme of type String to store the scheme. Values for this element are found against the
|
||||
* COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
|
||||
* the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
|
||||
* COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
|
||||
* access right scheme
|
||||
*/
|
||||
public class AccessRight extends Qualifier {
|
||||
|
||||
private String scheme;
|
||||
|
||||
public String getScheme() {
|
||||
return scheme;
|
||||
}
|
||||
|
||||
public void setScheme(String scheme) {
|
||||
this.scheme = scheme;
|
||||
}
|
||||
|
||||
public static AccessRight newInstance(String code, String label, String scheme) {
|
||||
AccessRight ar = new AccessRight();
|
||||
ar.setCode(code);
|
||||
ar.setLabel(label);
|
||||
ar.setScheme(scheme);
|
||||
return ar;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
|
||||
* name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
|
||||
* type String to store the family name of the author. The value for this parameter corresponds to
|
||||
* eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
|
||||
* this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
|
||||
* the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
|
||||
* rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
|
||||
* moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
|
||||
* eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
|
||||
* instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
|
||||
* instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
|
||||
* dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
|
||||
*/
|
||||
public class Author implements Serializable {
|
||||
|
||||
private String fullname;
|
||||
|
||||
private String name;
|
||||
|
||||
private String surname;
|
||||
|
||||
private Integer rank;
|
||||
|
||||
private Pid pid;
|
||||
|
||||
public String getFullname() {
|
||||
return fullname;
|
||||
}
|
||||
|
||||
public void setFullname(String fullname) {
|
||||
this.fullname = fullname;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getSurname() {
|
||||
return surname;
|
||||
}
|
||||
|
||||
public void setSurname(String surname) {
|
||||
this.surname = surname;
|
||||
}
|
||||
|
||||
public Integer getRank() {
|
||||
return rank;
|
||||
}
|
||||
|
||||
public void setRank(Integer rank) {
|
||||
this.rank = rank;
|
||||
}
|
||||
|
||||
public Pid getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(Pid pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* To store information about the conference or journal where the result has been presented or published. It contains
|
||||
* eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
|
||||
* parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
|
||||
* It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
|
||||
* the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
|
||||
* issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
|
||||
* iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
|
||||
* sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
|
||||
* of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
|
||||
* proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
|
||||
* String to store the place of the conference. It corresponds to the parameter conferenceplace of
|
||||
* eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
|
||||
* to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
|
||||
*/
|
||||
public class Container implements Serializable {
|
||||
|
||||
private String name;
|
||||
|
||||
private String issnPrinted;
|
||||
|
||||
private String issnOnline;
|
||||
|
||||
private String issnLinking;
|
||||
|
||||
private String ep;
|
||||
|
||||
private String iss;
|
||||
|
||||
private String sp;
|
||||
|
||||
private String vol;
|
||||
|
||||
private String edition;
|
||||
|
||||
private String conferenceplace;
|
||||
|
||||
private String conferencedate;
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getIssnPrinted() {
|
||||
return issnPrinted;
|
||||
}
|
||||
|
||||
public void setIssnPrinted(String issnPrinted) {
|
||||
this.issnPrinted = issnPrinted;
|
||||
}
|
||||
|
||||
public String getIssnOnline() {
|
||||
return issnOnline;
|
||||
}
|
||||
|
||||
public void setIssnOnline(String issnOnline) {
|
||||
this.issnOnline = issnOnline;
|
||||
}
|
||||
|
||||
public String getIssnLinking() {
|
||||
return issnLinking;
|
||||
}
|
||||
|
||||
public void setIssnLinking(String issnLinking) {
|
||||
this.issnLinking = issnLinking;
|
||||
}
|
||||
|
||||
public String getEp() {
|
||||
return ep;
|
||||
}
|
||||
|
||||
public void setEp(String ep) {
|
||||
this.ep = ep;
|
||||
}
|
||||
|
||||
public String getIss() {
|
||||
return iss;
|
||||
}
|
||||
|
||||
public void setIss(String iss) {
|
||||
this.iss = iss;
|
||||
}
|
||||
|
||||
public String getSp() {
|
||||
return sp;
|
||||
}
|
||||
|
||||
public void setSp(String sp) {
|
||||
this.sp = sp;
|
||||
}
|
||||
|
||||
public String getVol() {
|
||||
return vol;
|
||||
}
|
||||
|
||||
public void setVol(String vol) {
|
||||
this.vol = vol;
|
||||
}
|
||||
|
||||
public String getEdition() {
|
||||
return edition;
|
||||
}
|
||||
|
||||
public void setEdition(String edition) {
|
||||
this.edition = edition;
|
||||
}
|
||||
|
||||
public String getConferenceplace() {
|
||||
return conferenceplace;
|
||||
}
|
||||
|
||||
public void setConferenceplace(String conferenceplace) {
|
||||
this.conferenceplace = conferenceplace;
|
||||
}
|
||||
|
||||
public String getConferencedate() {
|
||||
return conferencedate;
|
||||
}
|
||||
|
||||
public void setConferencedate(String conferencedate) {
|
||||
this.conferencedate = conferencedate;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: -
|
||||
* scheme of type String to store the scheme - value of type String to store the value in that scheme
|
||||
*/
|
||||
public class ControlledField implements Serializable {
|
||||
private String scheme;
|
||||
private String value;
|
||||
|
||||
public String getScheme() {
|
||||
return scheme;
|
||||
}
|
||||
|
||||
public void setScheme(String scheme) {
|
||||
this.scheme = scheme;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static ControlledField newInstance(String scheme, String value) {
|
||||
ControlledField cf = new ControlledField();
|
||||
|
||||
cf.setScheme(scheme);
|
||||
cf.setValue(value);
|
||||
|
||||
return cf;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
/**
|
||||
* Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
|
||||
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
|
||||
* result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
|
||||
* to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
|
||||
* eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
|
||||
* dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
|
||||
* datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
|
||||
*/
|
||||
public class Country extends Qualifier {
|
||||
|
||||
private Provenance provenance;
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public static Country newInstance(String code, String label, Provenance provenance) {
|
||||
Country c = new Country();
|
||||
c.setProvenance(provenance);
|
||||
c.setCode(code);
|
||||
c.setLabel(label);
|
||||
return c;
|
||||
}
|
||||
|
||||
public static Country newInstance(String code, String label, String provenance, String trust) {
|
||||
return newInstance(code, label, Provenance.newInstance(provenance, trust));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Funder implements Serializable {
|
||||
private String shortName;
|
||||
|
||||
private String name;
|
||||
|
||||
private String jurisdiction;
|
||||
|
||||
public String getJurisdiction() {
|
||||
return jurisdiction;
|
||||
}
|
||||
|
||||
public void setJurisdiction(String jurisdiction) {
|
||||
this.jurisdiction = jurisdiction;
|
||||
}
|
||||
|
||||
public String getShortName() {
|
||||
return shortName;
|
||||
}
|
||||
|
||||
public void setShortName(String shortName) {
|
||||
this.shortName = shortName;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* Represents the geolocation information. It has three parameters: - point of type String to store the point
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
|
||||
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
|
||||
*/
|
||||
public class GeoLocation implements Serializable {
|
||||
|
||||
private String point;
|
||||
|
||||
private String box;
|
||||
|
||||
private String place;
|
||||
|
||||
public String getPoint() {
|
||||
return point;
|
||||
}
|
||||
|
||||
public void setPoint(String point) {
|
||||
this.point = point;
|
||||
}
|
||||
|
||||
public String getBox() {
|
||||
return box;
|
||||
}
|
||||
|
||||
public void setBox(String box) {
|
||||
this.box = box;
|
||||
}
|
||||
|
||||
public String getPlace() {
|
||||
return place;
|
||||
}
|
||||
|
||||
public void setPlace(String place) {
|
||||
this.place = place;
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public boolean isBlank() {
|
||||
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
|
||||
* versions are two manifestations of the same research result. It has the following parameters: - license of type
|
||||
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
|
||||
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
|
||||
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
|
||||
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - hostedby of
|
||||
* type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance can be
|
||||
* viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - key corresponds
|
||||
* to hostedby.key - value corresponds to hostedby.value - url of type List<String> list of locations where the instance
|
||||
* is accessible. It corresponds to url of the instance to be dumped - collectedfrom of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
|
||||
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
|
||||
* collectedfrom.key - value corresponds to collectedfrom.value - publicationdate of type String to store the
|
||||
* publication date of the instance ;// dateofacceptance; - refereed of type String to store information abour tthe
|
||||
* review status of the instance. Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed'. It corresponds to
|
||||
* refereed.classname of the instance to be dumped
|
||||
*/
|
||||
public class Instance implements Serializable {
|
||||
|
||||
private String license;
|
||||
|
||||
private AccessRight accessright;
|
||||
|
||||
private String type;
|
||||
|
||||
private KeyValue hostedby;
|
||||
|
||||
private List<String> url;
|
||||
|
||||
private KeyValue collectedfrom;
|
||||
|
||||
private String publicationdate;// dateofacceptance;
|
||||
|
||||
private String refereed; // peer-review status
|
||||
|
||||
public String getLicense() {
|
||||
return license;
|
||||
}
|
||||
|
||||
public void setLicense(String license) {
|
||||
this.license = license;
|
||||
}
|
||||
|
||||
public AccessRight getAccessright() {
|
||||
return accessright;
|
||||
}
|
||||
|
||||
public void setAccessright(AccessRight accessright) {
|
||||
this.accessright = accessright;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public KeyValue getHostedby() {
|
||||
return hostedby;
|
||||
}
|
||||
|
||||
public void setHostedby(KeyValue hostedby) {
|
||||
this.hostedby = hostedby;
|
||||
}
|
||||
|
||||
public List<String> getUrl() {
|
||||
return url;
|
||||
}
|
||||
|
||||
public void setUrl(List<String> url) {
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public KeyValue getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(KeyValue collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public String getPublicationdate() {
|
||||
return publicationdate;
|
||||
}
|
||||
|
||||
public void setPublicationdate(String publicationdate) {
|
||||
this.publicationdate = publicationdate;
|
||||
}
|
||||
|
||||
public String getRefereed() {
|
||||
return refereed;
|
||||
}
|
||||
|
||||
public void setRefereed(String refereed) {
|
||||
this.refereed = refereed;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* To represent the information described by a key and a value. It has two parameters: - key to store the key (generally
|
||||
* the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key)
|
||||
*/
|
||||
public class KeyValue implements Serializable {
|
||||
|
||||
private String key;
|
||||
|
||||
private String value;
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public void setKey(String key) {
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static KeyValue newInstance(String key, String value) {
|
||||
KeyValue inst = new KeyValue();
|
||||
inst.key = key;
|
||||
inst.value = value;
|
||||
return inst;
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public boolean isBlank() {
|
||||
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the generic persistent identifier. It has two parameters: - id of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. -
|
||||
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
|
||||
*/
|
||||
public class Pid implements Serializable {
|
||||
private ControlledField id;
|
||||
private Provenance provenance;
|
||||
|
||||
public ControlledField getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(ControlledField pid) {
|
||||
this.id = pid;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public static Pid newInstance(ControlledField pid, Provenance provenance) {
|
||||
Pid p = new Pid();
|
||||
p.id = pid;
|
||||
p.provenance = provenance;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
public static Pid newInstance(ControlledField pid) {
|
||||
Pid p = new Pid();
|
||||
p.id = pid;
|
||||
|
||||
return p;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Project implements Serializable {
|
||||
protected String id;// OpenAIRE id
|
||||
protected String code;
|
||||
|
||||
protected String acronym;
|
||||
|
||||
protected String title;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Indicates the process that produced (or provided) the information, and the trust associated to the information. It
|
||||
* has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
|
||||
* store the trust associated to the information
|
||||
*/
|
||||
public class Provenance implements Serializable {
|
||||
private String provenance;
|
||||
private String trust;
|
||||
|
||||
public String getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(String provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public String getTrust() {
|
||||
return trust;
|
||||
}
|
||||
|
||||
public void setTrust(String trust) {
|
||||
this.trust = trust;
|
||||
}
|
||||
|
||||
public static Provenance newInstance(String provenance, String trust) {
|
||||
Provenance p = new Provenance();
|
||||
p.provenance = provenance;
|
||||
p.trust = trust;
|
||||
return p;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return provenance + trust;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
/**
|
||||
* To represent the information described by a code and a value It has two parameters: - code to store the code
|
||||
* (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the
|
||||
* classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element
|
||||
*/
|
||||
public class Qualifier implements Serializable {
|
||||
|
||||
private String code; // the classid in the Qualifier
|
||||
private String label; // the classname in the Qualifier
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getLabel() {
|
||||
return label;
|
||||
}
|
||||
|
||||
public void setLabel(String label) {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public static Qualifier newInstance(String code, String value) {
|
||||
Qualifier qualifier = new Qualifier();
|
||||
qualifier.setCode(code);
|
||||
qualifier.setLabel(value);
|
||||
return qualifier;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,391 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
|
||||
/**
|
||||
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
|
||||
* Initiative/Infrastructures. It has the following parameters: - author of type
|
||||
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
|
||||
* represented in the internal model one author in the esternal model is produced. - type of type String to represent
|
||||
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
|
||||
* resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store
|
||||
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
|
||||
* corresponds to language.classname - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
|
||||
* list to which the result is associated. For each country in the result respresented in the internal model one country
|
||||
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
|
||||
* the result. For each subject in the result represented in the internal model one subject in the external model is
|
||||
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
|
||||
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
|
||||
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
|
||||
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
|
||||
* description.value in the result represented in the internal model - publicationdate of type String to store the
|
||||
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
|
||||
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
|
||||
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
|
||||
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
|
||||
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
|
||||
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
|
||||
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
|
||||
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
|
||||
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
|
||||
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
|
||||
* internal model - instance of type List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated
|
||||
* to the result. It corresponds to the same parameter in the result represented in the internal model - container of
|
||||
* type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It corresponds to the parameter
|
||||
* journal of the result represented in the internal model - documentationUrl of type List<String> (only for results of
|
||||
* type software) to store the URLs to the software documentation. It corresponds to the list of documentationUrl.value
|
||||
* of the result represented in the internal model - codeRepositoryUrl of type String (only for results of type
|
||||
* software) to store the URL to the repository with the source code. It corresponds to codeRepositoryUrl.value of the
|
||||
* result represented in the internal model - programmingLanguage of type String (only for results of type software) to
|
||||
* store the programming language. It corresponds to programmingLanguaga.classid of the result represented in the
|
||||
* internal model - contactperson of type List<String> (only for results of type other) to store the contact person for
|
||||
* this result. It corresponds to the list of contactperson.value of the result represented in the internal model -
|
||||
* contactgroup of type List<String> (only for results of type other) to store the information for the contact group. It
|
||||
* corresponds to the list of contactgroup.value of the result represented in the internal model - tool of type
|
||||
* List<String> (only fro results of type other) to store information about tool useful for the interpretation and/or
|
||||
* re-used of the research product. It corresponds to the list of tool.value in the result represented in the internal
|
||||
* modelt - size of type String (only for results of type dataset) to store the size of the dataset. It corresponds to
|
||||
* size.value in the result represented in the internal model - version of type String (only for results of type
|
||||
* dataset) to store the version. It corresponds to version.value of the result represented in the internal model -
|
||||
* geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type dataset) to store
|
||||
* geolocation information. For each geolocation element in the result represented in the internal model a GeoLocation
|
||||
* in the external model il produced - id of type String to store the OpenAIRE id of the result. It corresponds to the
|
||||
* id of the result represented in the internal model - originalId of type List<String> to store the original ids of the
|
||||
* result. It corresponds to the originalId of the result represented in the internal model - pid of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For each pid
|
||||
* in the results represented in the internal model one pid in the external model is produced. The value correspondence
|
||||
* is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - value corresponds
|
||||
* to the pid.value of the result represented in the internal model - dateofcollection of type String to store
|
||||
* information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
|
||||
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
|
||||
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
|
||||
*/
|
||||
public class Result implements Serializable {
|
||||
|
||||
private List<Author> author;
|
||||
|
||||
// resulttype allows subclassing results into publications | datasets | software
|
||||
private String type; // resulttype
|
||||
|
||||
// common fields
|
||||
private Qualifier language;
|
||||
|
||||
private List<Country> country;
|
||||
|
||||
private List<Subject> subjects;
|
||||
|
||||
private String maintitle;
|
||||
|
||||
private String subtitle;
|
||||
|
||||
private List<String> description;
|
||||
|
||||
private String publicationdate; // dateofacceptance;
|
||||
|
||||
private String publisher;
|
||||
|
||||
private String embargoenddate;
|
||||
|
||||
private List<String> source;
|
||||
|
||||
private List<String> format;
|
||||
|
||||
private List<String> contributor;
|
||||
|
||||
private List<String> coverage;
|
||||
|
||||
private AccessRight bestaccessright;
|
||||
|
||||
private List<Instance> instance;
|
||||
|
||||
private Container container;// Journal
|
||||
|
||||
private List<String> documentationUrl; // software
|
||||
|
||||
private String codeRepositoryUrl; // software
|
||||
|
||||
private String programmingLanguage; // software
|
||||
|
||||
private List<String> contactperson; // orp
|
||||
|
||||
private List<String> contactgroup; // orp
|
||||
|
||||
private List<String> tool; // orp
|
||||
|
||||
private String size; // dataset
|
||||
|
||||
private String version; // dataset
|
||||
|
||||
private List<GeoLocation> geolocation; // dataset
|
||||
|
||||
private String id;
|
||||
|
||||
private List<String> originalId;
|
||||
|
||||
private List<ControlledField> pid;
|
||||
|
||||
private String dateofcollection;
|
||||
|
||||
private Long lastupdatetimestamp;
|
||||
|
||||
public Long getLastupdatetimestamp() {
|
||||
return lastupdatetimestamp;
|
||||
}
|
||||
|
||||
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
|
||||
this.lastupdatetimestamp = lastupdatetimestamp;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(List<String> originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public String getDateofcollection() {
|
||||
return dateofcollection;
|
||||
}
|
||||
|
||||
public void setDateofcollection(String dateofcollection) {
|
||||
this.dateofcollection = dateofcollection;
|
||||
}
|
||||
|
||||
public List<Author> getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public Container getContainer() {
|
||||
return container;
|
||||
}
|
||||
|
||||
public void setContainer(Container container) {
|
||||
this.container = container;
|
||||
}
|
||||
|
||||
public void setAuthor(List<Author> author) {
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
public Qualifier getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(Qualifier language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public List<Country> getCountry() {
|
||||
return country;
|
||||
}
|
||||
|
||||
public void setCountry(List<Country> country) {
|
||||
this.country = country;
|
||||
}
|
||||
|
||||
public List<Subject> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(List<Subject> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public String getMaintitle() {
|
||||
return maintitle;
|
||||
}
|
||||
|
||||
public void setMaintitle(String maintitle) {
|
||||
this.maintitle = maintitle;
|
||||
}
|
||||
|
||||
public String getSubtitle() {
|
||||
return subtitle;
|
||||
}
|
||||
|
||||
public void setSubtitle(String subtitle) {
|
||||
this.subtitle = subtitle;
|
||||
}
|
||||
|
||||
public List<String> getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(List<String> description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public String getPublicationdate() {
|
||||
return publicationdate;
|
||||
}
|
||||
|
||||
public void setPublicationdate(String publicationdate) {
|
||||
this.publicationdate = publicationdate;
|
||||
}
|
||||
|
||||
public String getPublisher() {
|
||||
return publisher;
|
||||
}
|
||||
|
||||
public void setPublisher(String publisher) {
|
||||
this.publisher = publisher;
|
||||
}
|
||||
|
||||
public String getEmbargoenddate() {
|
||||
return embargoenddate;
|
||||
}
|
||||
|
||||
public void setEmbargoenddate(String embargoenddate) {
|
||||
this.embargoenddate = embargoenddate;
|
||||
}
|
||||
|
||||
public List<String> getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public void setSource(List<String> source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public List<String> getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setFormat(List<String> format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public List<String> getContributor() {
|
||||
return contributor;
|
||||
}
|
||||
|
||||
public void setContributor(List<String> contributor) {
|
||||
this.contributor = contributor;
|
||||
}
|
||||
|
||||
public List<String> getCoverage() {
|
||||
return coverage;
|
||||
}
|
||||
|
||||
public void setCoverage(List<String> coverage) {
|
||||
this.coverage = coverage;
|
||||
}
|
||||
|
||||
public AccessRight getBestaccessright() {
|
||||
return bestaccessright;
|
||||
}
|
||||
|
||||
public void setBestaccessright(AccessRight bestaccessright) {
|
||||
this.bestaccessright = bestaccessright;
|
||||
}
|
||||
|
||||
public List<Instance> getInstance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
public void setInstance(List<Instance> instance) {
|
||||
this.instance = instance;
|
||||
}
|
||||
|
||||
public List<String> getDocumentationUrl() {
|
||||
return documentationUrl;
|
||||
}
|
||||
|
||||
public void setDocumentationUrl(List<String> documentationUrl) {
|
||||
this.documentationUrl = documentationUrl;
|
||||
}
|
||||
|
||||
public String getCodeRepositoryUrl() {
|
||||
return codeRepositoryUrl;
|
||||
}
|
||||
|
||||
public void setCodeRepositoryUrl(String codeRepositoryUrl) {
|
||||
this.codeRepositoryUrl = codeRepositoryUrl;
|
||||
}
|
||||
|
||||
public String getProgrammingLanguage() {
|
||||
return programmingLanguage;
|
||||
}
|
||||
|
||||
public void setProgrammingLanguage(String programmingLanguage) {
|
||||
this.programmingLanguage = programmingLanguage;
|
||||
}
|
||||
|
||||
public List<String> getContactperson() {
|
||||
return contactperson;
|
||||
}
|
||||
|
||||
public void setContactperson(List<String> contactperson) {
|
||||
this.contactperson = contactperson;
|
||||
}
|
||||
|
||||
public List<String> getContactgroup() {
|
||||
return contactgroup;
|
||||
}
|
||||
|
||||
public void setContactgroup(List<String> contactgroup) {
|
||||
this.contactgroup = contactgroup;
|
||||
}
|
||||
|
||||
public List<String> getTool() {
|
||||
return tool;
|
||||
}
|
||||
|
||||
public void setTool(List<String> tool) {
|
||||
this.tool = tool;
|
||||
}
|
||||
|
||||
public String getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public void setSize(String size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(String version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public List<GeoLocation> getGeolocation() {
|
||||
return geolocation;
|
||||
}
|
||||
|
||||
public void setGeolocation(List<GeoLocation> geolocation) {
|
||||
this.geolocation = geolocation;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent keywords associated to the result. It has two parameters: - subject of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to
|
||||
* qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type
|
||||
* eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
|
||||
* is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to
|
||||
* dataInfo.trust
|
||||
*/
|
||||
public class Subject implements Serializable {
|
||||
private ControlledField subject;
|
||||
private Provenance provenance;
|
||||
|
||||
public ControlledField getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(ControlledField subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||
|
||||
/**
|
||||
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
|
||||
* information is added after the result is mapped to the external model - context of type
|
||||
* List<eu.dnetlib.dhp.schema/dump.oaf.community.Context> to store information about the RC RI related to the result.
|
||||
* For each context in the result represented in the internal model one context in the external model is produced -
|
||||
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
|
||||
* the record has been collected. For each collectedfrom in the result represented in the internal model one
|
||||
* collectedfrom in the external model is produced
|
||||
*/
|
||||
public class CommunityResult extends Result {
|
||||
|
||||
private List<Project> projects;
|
||||
|
||||
private List<Context> context;
|
||||
|
||||
protected List<KeyValue> collectedfrom;
|
||||
|
||||
public List<KeyValue> getCollectedfrom() {
|
||||
return collectedfrom;
|
||||
}
|
||||
|
||||
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public List<Project> getProjects() {
|
||||
return projects;
|
||||
}
|
||||
|
||||
public void setProjects(List<Project> projects) {
|
||||
this.projects = projects;
|
||||
}
|
||||
|
||||
public List<Context> getContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
public void setContext(List<Context> context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||
|
||||
/**
|
||||
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
|
||||
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
|
||||
* List<eu.dnetlib.dhp.schema.dump.oaf.Provenance> to store the provenances of the association between the result and
|
||||
* the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
|
||||
* to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
|
||||
* will be used as value for code - label it corresponds to the label associated to the id. The information id taken
|
||||
* from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
|
||||
* result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
|
||||
* instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
|
||||
* dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
|
||||
*/
|
||||
public class Context extends Qualifier {
|
||||
private List<Provenance> provenance;
|
||||
|
||||
public List<Provenance> getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(List<Provenance> provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
String provenance = new String();
|
||||
this.provenance.forEach(p -> provenance.concat(p.toString()));
|
||||
return Objects.hash(getCode(), getLabel(), provenance);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the funder funding the project related to the result. It has the following parameters: -
|
||||
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
|
||||
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
|
||||
* store the jurisdiction of the funder
|
||||
*/
|
||||
public class Funder implements Serializable {
|
||||
private String shortName;
|
||||
|
||||
private String name;
|
||||
|
||||
private String fundingStream;
|
||||
|
||||
private String jurisdiction;
|
||||
|
||||
public String getJurisdiction() {
|
||||
return jurisdiction;
|
||||
}
|
||||
|
||||
public void setJurisdiction(String jurisdiction) {
|
||||
this.jurisdiction = jurisdiction;
|
||||
}
|
||||
|
||||
public String getShortName() {
|
||||
return shortName;
|
||||
}
|
||||
|
||||
public void setShortName(String shortName) {
|
||||
this.shortName = shortName;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getFundingStream() {
|
||||
return fundingStream;
|
||||
}
|
||||
|
||||
public void setFundingStream(String fundingStream) {
|
||||
this.fundingStream = fundingStream;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
|
||||
/**
|
||||
* To store information about the project related to the result. This information is not directly mapped from the result
|
||||
* represented in the internal model because it is not there. The mapped result will be enriched with project
|
||||
* information derived by relation between results and projects. Project class has the following parameters: - id of
|
||||
* type String to store the OpenAIRE id for the Project - code of type String to store the grant agreement - acronym of
|
||||
* type String to store the acronym for the project - title of type String to store the title of the project - funder of
|
||||
* type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information about the funder funding the project -
|
||||
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store information about the. provenance of the
|
||||
* association between the result and the project
|
||||
*/
|
||||
public class Project implements Serializable {
|
||||
|
||||
private String id;// OpenAIRE id
|
||||
private String code;
|
||||
|
||||
private String acronym;
|
||||
|
||||
private String title;
|
||||
|
||||
private Funder funder;
|
||||
|
||||
private Provenance provenance;
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public Funder getFunder() {
|
||||
return funder;
|
||||
}
|
||||
|
||||
public void setFunder(Funder funders) {
|
||||
this.funder = funders;
|
||||
}
|
||||
|
||||
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
|
||||
Project project = new Project();
|
||||
project.setAcronym(acronym);
|
||||
project.setCode(code);
|
||||
project.setFunder(funder);
|
||||
project.setId(id);
|
||||
project.setTitle(title);
|
||||
return project;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Constants implements Serializable {
|
||||
// collectedFrom va con isProvidedBy -> becco da ModelSupport
|
||||
|
||||
public static final String HOSTED_BY = "isHostedBy";
|
||||
public static final String HOSTS = "hosts";
|
||||
|
||||
// community result uso isrelatedto
|
||||
|
||||
public static final String RESULT_ENTITY = "result";
|
||||
public static final String DATASOURCE_ENTITY = "datasource";
|
||||
public static final String CONTEXT_ENTITY = "context";
|
||||
|
||||
public static final String CONTEXT_ID = "60";
|
||||
public static final String CONTEXT_NS_PREFIX = "context____";
|
||||
|
||||
}
|
|
@ -0,0 +1,316 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Container;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
|
||||
/**
|
||||
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
|
||||
* id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
|
||||
* represented in the internal model - originalId of type List<String> to store the list of original ids associated to
|
||||
* the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
|
||||
* null values are filtered out - pid of type List<eu.dnetlib.shp.schema.dump.oaf.ControlledField> to store the
|
||||
* persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
|
||||
* in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
|
||||
* the internal model - value corresponds to pid.value of the datasource represented in the internal model -
|
||||
* datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
|
||||
* pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
|
||||
* corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
|
||||
* datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
|
||||
* store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
|
||||
* It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
|
||||
* of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
|
||||
* represented in the internal model - englishname of type String to store the English name of the datasource. It
|
||||
* corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
|
||||
* store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
|
||||
* the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
|
||||
* logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
|
||||
* of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
|
||||
* datasource represented in the internal model - description of type String to store the description for the
|
||||
* datasource. It corresponds to description.value of the datasource represented in the internal model
|
||||
*/
|
||||
public class Datasource implements Serializable {
|
||||
|
||||
private String id; // string
|
||||
|
||||
private List<String> originalId; // list string
|
||||
|
||||
private List<ControlledField> pid; // list<String>
|
||||
|
||||
private ControlledField datasourcetype; // value
|
||||
|
||||
private String openairecompatibility; // value
|
||||
|
||||
private String officialname; // string
|
||||
|
||||
private String englishname; // string
|
||||
|
||||
private String websiteurl; // string
|
||||
|
||||
private String logourl; // string
|
||||
|
||||
private String dateofvalidation; // string
|
||||
|
||||
private String description; // description
|
||||
|
||||
private List<String> subjects; // List<String>
|
||||
|
||||
// opendoar specific fields (od*)
|
||||
|
||||
private List<String> languages; // odlanguages List<String>
|
||||
|
||||
private List<String> contenttypes; // odcontent types List<String>
|
||||
|
||||
// re3data fields
|
||||
private String releasestartdate; // string
|
||||
|
||||
private String releaseenddate; // string
|
||||
|
||||
private String missionstatementurl; // string
|
||||
|
||||
// {open, restricted or closed}
|
||||
private String accessrights; // databaseaccesstype string
|
||||
|
||||
// {open, restricted or closed}
|
||||
private String uploadrights; // datauploadtype string
|
||||
|
||||
// {feeRequired, registration, other}
|
||||
private String databaseaccessrestriction; // string
|
||||
|
||||
// {feeRequired, registration, other}
|
||||
private String datauploadrestriction; // string
|
||||
|
||||
private Boolean versioning; // boolean
|
||||
|
||||
private String citationguidelineurl; // string
|
||||
|
||||
// {yes, no, uknown}
|
||||
|
||||
private String pidsystems; // string
|
||||
|
||||
private String certificates; // string
|
||||
|
||||
private List<Object> policies; //
|
||||
|
||||
private Container journal; // issn etc del Journal
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(List<String> originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
public ControlledField getDatasourcetype() {
|
||||
return datasourcetype;
|
||||
}
|
||||
|
||||
public void setDatasourcetype(ControlledField datasourcetype) {
|
||||
this.datasourcetype = datasourcetype;
|
||||
}
|
||||
|
||||
public String getOpenairecompatibility() {
|
||||
return openairecompatibility;
|
||||
}
|
||||
|
||||
public void setOpenairecompatibility(String openairecompatibility) {
|
||||
this.openairecompatibility = openairecompatibility;
|
||||
}
|
||||
|
||||
public String getOfficialname() {
|
||||
return officialname;
|
||||
}
|
||||
|
||||
public void setOfficialname(String officialname) {
|
||||
this.officialname = officialname;
|
||||
}
|
||||
|
||||
public String getEnglishname() {
|
||||
return englishname;
|
||||
}
|
||||
|
||||
public void setEnglishname(String englishname) {
|
||||
this.englishname = englishname;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public String getLogourl() {
|
||||
return logourl;
|
||||
}
|
||||
|
||||
public void setLogourl(String logourl) {
|
||||
this.logourl = logourl;
|
||||
}
|
||||
|
||||
public String getDateofvalidation() {
|
||||
return dateofvalidation;
|
||||
}
|
||||
|
||||
public void setDateofvalidation(String dateofvalidation) {
|
||||
this.dateofvalidation = dateofvalidation;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public List<String> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(List<String> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public List<String> getLanguages() {
|
||||
return languages;
|
||||
}
|
||||
|
||||
public void setLanguages(List<String> languages) {
|
||||
this.languages = languages;
|
||||
}
|
||||
|
||||
public List<String> getContenttypes() {
|
||||
return contenttypes;
|
||||
}
|
||||
|
||||
public void setContenttypes(List<String> contenttypes) {
|
||||
this.contenttypes = contenttypes;
|
||||
}
|
||||
|
||||
public String getReleasestartdate() {
|
||||
return releasestartdate;
|
||||
}
|
||||
|
||||
public void setReleasestartdate(String releasestartdate) {
|
||||
this.releasestartdate = releasestartdate;
|
||||
}
|
||||
|
||||
public String getReleaseenddate() {
|
||||
return releaseenddate;
|
||||
}
|
||||
|
||||
public void setReleaseenddate(String releaseenddate) {
|
||||
this.releaseenddate = releaseenddate;
|
||||
}
|
||||
|
||||
public String getMissionstatementurl() {
|
||||
return missionstatementurl;
|
||||
}
|
||||
|
||||
public void setMissionstatementurl(String missionstatementurl) {
|
||||
this.missionstatementurl = missionstatementurl;
|
||||
}
|
||||
|
||||
public String getAccessrights() {
|
||||
return accessrights;
|
||||
}
|
||||
|
||||
public void setAccessrights(String accessrights) {
|
||||
this.accessrights = accessrights;
|
||||
}
|
||||
|
||||
public String getUploadrights() {
|
||||
return uploadrights;
|
||||
}
|
||||
|
||||
public void setUploadrights(String uploadrights) {
|
||||
this.uploadrights = uploadrights;
|
||||
}
|
||||
|
||||
public String getDatabaseaccessrestriction() {
|
||||
return databaseaccessrestriction;
|
||||
}
|
||||
|
||||
public void setDatabaseaccessrestriction(String databaseaccessrestriction) {
|
||||
this.databaseaccessrestriction = databaseaccessrestriction;
|
||||
}
|
||||
|
||||
public String getDatauploadrestriction() {
|
||||
return datauploadrestriction;
|
||||
}
|
||||
|
||||
public void setDatauploadrestriction(String datauploadrestriction) {
|
||||
this.datauploadrestriction = datauploadrestriction;
|
||||
}
|
||||
|
||||
public Boolean getVersioning() {
|
||||
return versioning;
|
||||
}
|
||||
|
||||
public void setVersioning(Boolean versioning) {
|
||||
this.versioning = versioning;
|
||||
}
|
||||
|
||||
public String getCitationguidelineurl() {
|
||||
return citationguidelineurl;
|
||||
}
|
||||
|
||||
public void setCitationguidelineurl(String citationguidelineurl) {
|
||||
this.citationguidelineurl = citationguidelineurl;
|
||||
}
|
||||
|
||||
public String getPidsystems() {
|
||||
return pidsystems;
|
||||
}
|
||||
|
||||
public void setPidsystems(String pidsystems) {
|
||||
this.pidsystems = pidsystems;
|
||||
}
|
||||
|
||||
public String getCertificates() {
|
||||
return certificates;
|
||||
}
|
||||
|
||||
public void setCertificates(String certificates) {
|
||||
this.certificates = certificates;
|
||||
}
|
||||
|
||||
public List<Object> getPolicies() {
|
||||
return policies;
|
||||
}
|
||||
|
||||
public void setPolicies(List<Object> policiesr3) {
|
||||
this.policies = policiesr3;
|
||||
}
|
||||
|
||||
public Container getJournal() {
|
||||
return journal;
|
||||
}
|
||||
|
||||
public void setJournal(Container journal) {
|
||||
this.journal = journal;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the funder funding the project related to the result. It has the following parameters:
|
||||
* - private String shortName to store the short name of the funder (e.g. AKA)
|
||||
* - private String name to store information about the name of the funder (e.g. Akademy of Finland)
|
||||
* - private Fundings funding_stream to store the fundingstream
|
||||
* - private String jurisdiction to store information about the jurisdiction of the funder
|
||||
*/
|
||||
public class Funder implements Serializable {
|
||||
|
||||
private String shortName;
|
||||
|
||||
private String name;
|
||||
|
||||
private Fundings funding_stream;
|
||||
|
||||
private String jurisdiction;
|
||||
|
||||
public String getShortName() {
|
||||
return shortName;
|
||||
}
|
||||
|
||||
public void setShortName(String shortName) {
|
||||
this.shortName = shortName;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getJurisdiction() {
|
||||
return jurisdiction;
|
||||
}
|
||||
|
||||
public void setJurisdiction(String jurisdiction) {
|
||||
this.jurisdiction = jurisdiction;
|
||||
}
|
||||
|
||||
public Fundings getFunding_stream() {
|
||||
return funding_stream;
|
||||
}
|
||||
|
||||
public void setFunding_stream(Fundings funding_stream) {
|
||||
this.funding_stream = funding_stream;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store inforamtion about the funding stream. It has two parameters:
|
||||
* - private String id to store the id of the fundings stream. The id is created by appending the shortname of the
|
||||
* funder to the name of each level in the xml representing the fundng stream. For example: if the funder is the
|
||||
* European Commission, the funding level 0 name is FP7, the funding level 1 name is SP3 and the funding level 2 name is
|
||||
* PEOPLE then the id will be: EC::FP7::SP3::PEOPLE
|
||||
* - private String description to describe the funding stream. It is created by concatenating the description of each funding
|
||||
* level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions
|
||||
*/
|
||||
public class Fundings implements Serializable {
|
||||
|
||||
private String id;
|
||||
private String description;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* To describe the funded amount. It has the following parameters:
|
||||
* - private String currency to store the currency of the fund
|
||||
* - private float totalcost to store the total cost of the project
|
||||
* - private float fundedamount to store the funded amount by the funder
|
||||
*/
|
||||
public class Granted implements Serializable {
|
||||
private String currency;
|
||||
private float totalcost;
|
||||
private float fundedamount;
|
||||
|
||||
public String getCurrency() {
|
||||
return currency;
|
||||
}
|
||||
|
||||
public void setCurrency(String currency) {
|
||||
this.currency = currency;
|
||||
}
|
||||
|
||||
public float getTotalcost() {
|
||||
return totalcost;
|
||||
}
|
||||
|
||||
public void setTotalcost(float totalcost) {
|
||||
this.totalcost = totalcost;
|
||||
}
|
||||
|
||||
public float getFundedamount() {
|
||||
return fundedamount;
|
||||
}
|
||||
|
||||
public void setFundedamount(float fundedamount) {
|
||||
this.fundedamount = fundedamount;
|
||||
}
|
||||
|
||||
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
|
||||
Granted granted = new Granted();
|
||||
granted.currency = currency;
|
||||
granted.totalcost = totalcost;
|
||||
granted.fundedamount = fundedamount;
|
||||
return granted;
|
||||
}
|
||||
|
||||
public static Granted newInstance(String currency, float fundedamount) {
|
||||
Granted granted = new Granted();
|
||||
granted.currency = currency;
|
||||
granted.fundedamount = fundedamount;
|
||||
return granted;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the generic node in a relation. It has the following parameters:
|
||||
* - private String id the openaire id of the entity in the relation
|
||||
* - private String type the type of the entity in the relation.
|
||||
*
|
||||
* Consider the generic relation between a Result R and a Project P, the node representing R will have
|
||||
* as id the id of R and as type result, while the node representing the project will have as id the id of the project
|
||||
* and as type project
|
||||
*/
|
||||
public class Node implements Serializable {
|
||||
private String id;
|
||||
private String type;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public static Node newInstance(String id, String type) {
|
||||
Node node = new Node();
|
||||
node.id = id;
|
||||
node.type = type;
|
||||
return node;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||
|
||||
/**
|
||||
* To represent the generic organizaiton. It has the following parameters:
|
||||
* - private String legalshortname to store the legalshortname of the organizaiton
|
||||
* - private String legalname to store the legal name of the organization
|
||||
* - private String websiteurl to store the websiteurl of the organization
|
||||
* - private List<String> alternativenames to store the alternative names of the organization
|
||||
* - private Qualifier country to store the country of the organization
|
||||
* - private String id to store the id of the organization
|
||||
* - private List<ControlledField> pid to store the list of pids for the organization
|
||||
*/
|
||||
public class Organization implements Serializable {
|
||||
private String legalshortname;
|
||||
private String legalname;
|
||||
private String websiteurl;
|
||||
private List<String> alternativenames;
|
||||
private Qualifier country;
|
||||
private String id;
|
||||
private List<ControlledField> pid;
|
||||
|
||||
public String getLegalshortname() {
|
||||
return legalshortname;
|
||||
}
|
||||
|
||||
public void setLegalshortname(String legalshortname) {
|
||||
this.legalshortname = legalshortname;
|
||||
}
|
||||
|
||||
public String getLegalname() {
|
||||
return legalname;
|
||||
}
|
||||
|
||||
public void setLegalname(String legalname) {
|
||||
this.legalname = legalname;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public List<String> getAlternativenames() {
|
||||
return alternativenames;
|
||||
}
|
||||
|
||||
public void setAlternativenames(List<String> alternativenames) {
|
||||
this.alternativenames = alternativenames;
|
||||
}
|
||||
|
||||
public Qualifier getCountry() {
|
||||
return country;
|
||||
}
|
||||
|
||||
public void setCountry(Qualifier country) {
|
||||
this.country = country;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<ControlledField> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<ControlledField> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To store information about the ec programme for the project. It has the following parameters:
|
||||
* - private String code to store the code of the programme
|
||||
* - private String description to store the description of the programme
|
||||
*/
|
||||
public class Programme implements Serializable {
|
||||
private String code;
|
||||
private String description;
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public static Programme newInstance(String code, String description) {
|
||||
Programme p = new Programme();
|
||||
p.code = code;
|
||||
p.description = description;
|
||||
return p;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||
|
||||
/**
|
||||
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
|
||||
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
|
||||
* Projects but we put the information about the Funder within the Project representation. We also removed the
|
||||
* collected from element from the Project. No relation between the Project and the Datasource entity from which it is
|
||||
* collected will be created. We will never create relations between Project and Datasource. In case some relation will
|
||||
* be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project,
|
||||
* project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to
|
||||
* 0. It has the following parameters:
|
||||
* - private String id to store the id of the project (OpenAIRE id)
|
||||
* - private String websiteurl to store the websiteurl of the project
|
||||
* - private String code to store the grant agreement of the project
|
||||
* - private String acronym to store the acronym of the project
|
||||
* - private String title to store the tile of the project
|
||||
* - private String startdate to store the start date
|
||||
* - private String enddate to store the end date
|
||||
* - private String callidentifier to store the call indentifier
|
||||
* - private String keywords to store the keywords
|
||||
* - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate
|
||||
* for publications. This value will be set to true if one of the field in the project represented in the internal model
|
||||
* is set to true
|
||||
* - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for
|
||||
* dataset. It is set to the value in the corresponding filed of the project represented in the internal model
|
||||
* - private List<String> subject to store the list of subjects of the project
|
||||
* - private List<Funder> funding to store the list of funder of the project
|
||||
* - private String summary to store the summary of the project
|
||||
* - private Granted granted to store the granted amount
|
||||
* - private List<Programme> programme to store the list of programmes the project is related to
|
||||
*/
|
||||
|
||||
public class Project implements Serializable {
|
||||
private String id;
|
||||
|
||||
private String websiteurl;
|
||||
private String code;
|
||||
private String acronym;
|
||||
private String title;
|
||||
private String startdate;
|
||||
|
||||
private String enddate;
|
||||
|
||||
private String callidentifier;
|
||||
|
||||
private String keywords;
|
||||
|
||||
private boolean openaccessmandateforpublications;
|
||||
|
||||
private boolean openaccessmandatefordataset;
|
||||
private List<String> subject;
|
||||
|
||||
private List<Funder> funding;
|
||||
|
||||
private String summary;
|
||||
|
||||
private Granted granted;
|
||||
|
||||
private List<Programme> programme;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getWebsiteurl() {
|
||||
return websiteurl;
|
||||
}
|
||||
|
||||
public void setWebsiteurl(String websiteurl) {
|
||||
this.websiteurl = websiteurl;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getAcronym() {
|
||||
return acronym;
|
||||
}
|
||||
|
||||
public void setAcronym(String acronym) {
|
||||
this.acronym = acronym;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getStartdate() {
|
||||
return startdate;
|
||||
}
|
||||
|
||||
public void setStartdate(String startdate) {
|
||||
this.startdate = startdate;
|
||||
}
|
||||
|
||||
public String getEnddate() {
|
||||
return enddate;
|
||||
}
|
||||
|
||||
public void setEnddate(String enddate) {
|
||||
this.enddate = enddate;
|
||||
}
|
||||
|
||||
public String getCallidentifier() {
|
||||
return callidentifier;
|
||||
}
|
||||
|
||||
public void setCallidentifier(String callidentifier) {
|
||||
this.callidentifier = callidentifier;
|
||||
}
|
||||
|
||||
public String getKeywords() {
|
||||
return keywords;
|
||||
}
|
||||
|
||||
public void setKeywords(String keywords) {
|
||||
this.keywords = keywords;
|
||||
}
|
||||
|
||||
public boolean isOpenaccessmandateforpublications() {
|
||||
return openaccessmandateforpublications;
|
||||
}
|
||||
|
||||
public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) {
|
||||
this.openaccessmandateforpublications = openaccessmandateforpublications;
|
||||
}
|
||||
|
||||
public boolean isOpenaccessmandatefordataset() {
|
||||
return openaccessmandatefordataset;
|
||||
}
|
||||
|
||||
public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) {
|
||||
this.openaccessmandatefordataset = openaccessmandatefordataset;
|
||||
}
|
||||
|
||||
public List<String> getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(List<String> subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
public List<Funder> getFunding() {
|
||||
return funding;
|
||||
}
|
||||
|
||||
public void setFunding(List<Funder> funding) {
|
||||
this.funding = funding;
|
||||
}
|
||||
|
||||
public String getSummary() {
|
||||
return summary;
|
||||
}
|
||||
|
||||
public void setSummary(String summary) {
|
||||
this.summary = summary;
|
||||
}
|
||||
|
||||
public Granted getGranted() {
|
||||
return granted;
|
||||
}
|
||||
|
||||
public void setGranted(Granted granted) {
|
||||
this.granted = granted;
|
||||
}
|
||||
|
||||
public List<Programme> getProgramme() {
|
||||
return programme;
|
||||
}
|
||||
|
||||
public void setProgramme(List<Programme> programme) {
|
||||
this.programme = programme;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent the semantics of the generic relation between two entities. It has the following parameters:
|
||||
* - private String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the
|
||||
* relclass parameter in the relation represented in the internal model
|
||||
* represented in the internal model
|
||||
* - private String type to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter
|
||||
* of the relation represented in theinternal model
|
||||
*/
|
||||
public class RelType implements Serializable {
|
||||
private String name; // relclass
|
||||
private String type; // subreltype
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public static RelType newInstance(String name, String type) {
|
||||
RelType rel = new RelType();
|
||||
rel.name = name;
|
||||
rel.type = type;
|
||||
return rel;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||
|
||||
/**
|
||||
* To represent the gereric relation between two entities. It has the following parameters:
|
||||
* - private Node source to represent the entity source of the relation
|
||||
* - private Node target to represent the entity target of the relation
|
||||
* - private RelType reltype to represent the semantics of the relation
|
||||
* - private Provenance provenance to represent the provenance of the relation
|
||||
*/
|
||||
public class Relation implements Serializable {
|
||||
private Node source;
|
||||
private Node target;
|
||||
private RelType reltype;
|
||||
private Provenance provenance;
|
||||
|
||||
public Node getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public void setSource(Node source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public Node getTarget() {
|
||||
return target;
|
||||
}
|
||||
|
||||
public void setTarget(Node target) {
|
||||
this.target = target;
|
||||
}
|
||||
|
||||
public RelType getReltype() {
|
||||
return reltype;
|
||||
}
|
||||
|
||||
public void setReltype(RelType reltype) {
|
||||
this.reltype = reltype;
|
||||
}
|
||||
|
||||
public Provenance getProvenance() {
|
||||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(Provenance provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
|
||||
}
|
||||
|
||||
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
|
||||
Relation relation = new Relation();
|
||||
relation.source = source;
|
||||
relation.target = target;
|
||||
relation.reltype = reltype;
|
||||
relation.provenance = provenance;
|
||||
return relation;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
|
||||
* to store the list of subjects related to the community
|
||||
*/
|
||||
public class ResearchCommunity extends ResearchInitiative {
|
||||
private List<String> subject;
|
||||
|
||||
public List<String> getSubject() {
|
||||
return subject;
|
||||
}
|
||||
|
||||
public void setSubject(List<String> subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
|
||||
* - private String id to store the openaire id for the entity. Is has as code 00 and will be created as
|
||||
* 00|context_____::md5(originalId)
|
||||
* private String originalId to store the id of the context as provided in the profile (i.e. mes)
|
||||
* private String name to store the name of the context (got from the label attribute in the context definition)
|
||||
* private String type to store the type of the context (i.e.: research initiative or research community)
|
||||
* private String description to store the description of the context as given in the profile
|
||||
* private String zenodo_community to store the zenodo community associated to the context (main zenodo community)
|
||||
*/
|
||||
public class ResearchInitiative implements Serializable {
|
||||
private String id; // openaireId
|
||||
private String originalId; // context id
|
||||
private String name; // context name
|
||||
private String type; // context type: research initiative or research community
|
||||
private String description;
|
||||
private String zenodo_community;
|
||||
|
||||
public String getZenodo_community() {
|
||||
return zenodo_community;
|
||||
}
|
||||
|
||||
public void setZenodo_community(String zenodo_community) {
|
||||
this.zenodo_community = zenodo_community;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String label) {
|
||||
this.name = label;
|
||||
}
|
||||
|
||||
public String getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(String originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.scholexplorer;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class DLIRelation extends Relation {
|
||||
|
||||
private String dateOfCollection;
|
||||
|
||||
private List<KeyValue> collectedFrom;
|
||||
|
||||
public List<KeyValue> getCollectedFrom() {
|
||||
return collectedFrom;
|
||||
}
|
||||
|
||||
public void setCollectedFrom(List<KeyValue> collectedFrom) {
|
||||
this.collectedFrom = collectedFrom;
|
||||
}
|
||||
|
||||
public String getDateOfCollection() {
|
||||
return dateOfCollection;
|
||||
}
|
||||
|
||||
public void setDateOfCollection(String dateOfCollection) {
|
||||
this.dateOfCollection = dateOfCollection;
|
||||
}
|
||||
}
|
|
@ -2,10 +2,8 @@
|
|||
package eu.dnetlib.dhp.schema.scholexplorer;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
|
@ -78,6 +76,25 @@ public class DLIUnknown extends Oaf implements Serializable {
|
|||
if ("complete".equalsIgnoreCase(p.completionStatus))
|
||||
completionStatus = "complete";
|
||||
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
||||
if (StringUtils.isEmpty(id) && StringUtils.isNoneEmpty(p.getId()))
|
||||
id = p.getId();
|
||||
if (StringUtils.isEmpty(dateofcollection) && StringUtils.isNoneEmpty(p.getDateofcollection()))
|
||||
dateofcollection = p.getDateofcollection();
|
||||
|
||||
if (StringUtils.isEmpty(dateoftransformation) && StringUtils.isNoneEmpty(p.getDateoftransformation()))
|
||||
dateofcollection = p.getDateoftransformation();
|
||||
pid = mergeLists(pid, p.getPid());
|
||||
}
|
||||
|
||||
protected <T> List<T> mergeLists(final List<T>... lists) {
|
||||
|
||||
return Arrays
|
||||
.stream(lists)
|
||||
.filter(Objects::nonNull)
|
||||
.flatMap(List::stream)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private List<ProvenaceInfo> mergeProvenance(
|
||||
|
|
|
@ -31,6 +31,10 @@
|
|||
<artifactId>elasticsearch-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class ConditionParams implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 2719901844537516110L;
|
||||
|
||||
private String value;
|
||||
private String otherValue;
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(final String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getOtherValue() {
|
||||
return otherValue;
|
||||
}
|
||||
|
||||
public void setOtherValue(final String otherValue) {
|
||||
this.otherValue = otherValue;
|
||||
}
|
||||
}
|
|
@ -2,7 +2,6 @@
|
|||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
@ -19,16 +18,12 @@ public class EventFactory {
|
|||
|
||||
private final static String PRODUCER_ID = "OpenAIRE";
|
||||
|
||||
private static final int TTH_DAYS = 365;
|
||||
|
||||
private final static String[] DATE_PATTERNS = {
|
||||
"yyyy-MM-dd"
|
||||
};
|
||||
|
||||
public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {
|
||||
|
||||
final long now = new Date().getTime();
|
||||
|
||||
final Event res = new Event();
|
||||
|
||||
final MappedFields map = createMapFromResult(updateInfo);
|
||||
|
@ -44,8 +39,8 @@ public class EventFactory {
|
|||
res.setPayload(updateInfo.asBrokerPayload().toJSON());
|
||||
res.setMap(map);
|
||||
res.setTopic(updateInfo.getTopicPath());
|
||||
res.setCreationDate(now);
|
||||
res.setExpiryDate(calculateExpiryDate(now));
|
||||
res.setCreationDate(0l);
|
||||
res.setExpiryDate(Long.MAX_VALUE);
|
||||
res.setInstantMessage(false);
|
||||
|
||||
return res;
|
||||
|
@ -96,7 +91,9 @@ public class EventFactory {
|
|||
return map;
|
||||
}
|
||||
|
||||
private static String calculateEventId(final String topic, final String dsId, final String publicationId,
|
||||
private static String calculateEventId(final String topic,
|
||||
final String dsId,
|
||||
final String publicationId,
|
||||
final String value) {
|
||||
return "event-"
|
||||
+ DigestUtils.md5Hex(topic).substring(0, 4) + "-"
|
||||
|
@ -105,10 +102,6 @@ public class EventFactory {
|
|||
+ DigestUtils.md5Hex(value).substring(0, 5);
|
||||
}
|
||||
|
||||
private static long calculateExpiryDate(final long now) {
|
||||
return now + TTH_DAYS * 24 * 60 * 60 * 1000;
|
||||
}
|
||||
|
||||
private static long parseDateTolong(final String date) {
|
||||
if (StringUtils.isBlank(date)) {
|
||||
return -1;
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class MapCondition implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -7137490975452466813L;
|
||||
|
||||
private String field;
|
||||
private List<ConditionParams> listParams = new ArrayList<>();
|
||||
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
public void setField(final String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
public List<ConditionParams> getListParams() {
|
||||
return listParams;
|
||||
}
|
||||
|
||||
public void setListParams(final List<ConditionParams> listParams) {
|
||||
this.listParams = listParams;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Notification implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -1770420972526995727L;
|
||||
|
||||
private String notificationId;
|
||||
|
||||
private String subscriptionId;
|
||||
|
||||
private String producerId;
|
||||
|
||||
private String eventId;
|
||||
|
||||
private String topic;
|
||||
|
||||
private Long date;
|
||||
|
||||
private String payload;
|
||||
|
||||
private MappedFields map;
|
||||
|
||||
public String getNotificationId() {
|
||||
return notificationId;
|
||||
}
|
||||
|
||||
public void setNotificationId(final String notificationId) {
|
||||
this.notificationId = notificationId;
|
||||
}
|
||||
|
||||
public String getSubscriptionId() {
|
||||
return subscriptionId;
|
||||
}
|
||||
|
||||
public void setSubscriptionId(final String subscriptionId) {
|
||||
this.subscriptionId = subscriptionId;
|
||||
}
|
||||
|
||||
public String getProducerId() {
|
||||
return producerId;
|
||||
}
|
||||
|
||||
public void setProducerId(final String producerId) {
|
||||
this.producerId = producerId;
|
||||
}
|
||||
|
||||
public String getEventId() {
|
||||
return eventId;
|
||||
}
|
||||
|
||||
public void setEventId(final String eventId) {
|
||||
this.eventId = eventId;
|
||||
}
|
||||
|
||||
public String getTopic() {
|
||||
return topic;
|
||||
}
|
||||
|
||||
public void setTopic(final String topic) {
|
||||
this.topic = topic;
|
||||
}
|
||||
|
||||
public String getPayload() {
|
||||
return payload;
|
||||
}
|
||||
|
||||
public void setPayload(final String payload) {
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
public MappedFields getMap() {
|
||||
return map;
|
||||
}
|
||||
|
||||
public void setMap(final MappedFields map) {
|
||||
this.map = map;
|
||||
}
|
||||
|
||||
public Long getDate() {
|
||||
return date;
|
||||
}
|
||||
|
||||
public void setDate(final Long date) {
|
||||
this.date = date;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class Subscription implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 1051702214740830010L;
|
||||
|
||||
private String subscriptionId;
|
||||
|
||||
private String subscriber;
|
||||
|
||||
private String topic;
|
||||
|
||||
private String conditions;
|
||||
|
||||
public String getSubscriptionId() {
|
||||
return subscriptionId;
|
||||
}
|
||||
|
||||
public void setSubscriptionId(final String subscriptionId) {
|
||||
this.subscriptionId = subscriptionId;
|
||||
}
|
||||
|
||||
public String getSubscriber() {
|
||||
return subscriber;
|
||||
}
|
||||
|
||||
public void setSubscriber(final String subscriber) {
|
||||
this.subscriber = subscriber;
|
||||
}
|
||||
|
||||
public String getTopic() {
|
||||
return topic;
|
||||
}
|
||||
|
||||
public void setTopic(final String topic) {
|
||||
this.topic = topic;
|
||||
}
|
||||
|
||||
public String getConditions() {
|
||||
return conditions;
|
||||
}
|
||||
|
||||
public void setConditions(final String conditions) {
|
||||
this.conditions = conditions;
|
||||
}
|
||||
|
||||
public Map<String, List<ConditionParams>> conditionsAsMap() {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
try {
|
||||
final List<MapCondition> list = mapper
|
||||
.readValue(
|
||||
getConditions(), mapper.getTypeFactory().constructCollectionType(List.class, MapCondition.class));
|
||||
return list
|
||||
.stream()
|
||||
.filter(mc -> !mc.getListParams().isEmpty())
|
||||
.collect(Collectors.toMap(MapCondition::getField, MapCondition::getListParams));
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -28,7 +28,7 @@ public class GenerateStatsJob {
|
|||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
IndexOnESJob.class
|
||||
GenerateStatsJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpDelete;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.sql.TypedColumn;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.broker.model.Event;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.subset.EventSubsetAggregator;
|
||||
|
||||
public class IndexEventSubsetJob {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(IndexEventSubsetJob.class);
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
IndexEventSubsetJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_event_subset.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
|
||||
final String eventsPath = parser.get("workingPath") + "/events";
|
||||
log.info("eventsPath: {}", eventsPath);
|
||||
|
||||
final String index = parser.get("index");
|
||||
log.info("index: {}", index);
|
||||
|
||||
final String indexHost = parser.get("esHost");
|
||||
log.info("indexHost: {}", indexHost);
|
||||
|
||||
final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
|
||||
log.info("maxEventsForTopic: {}", maxEventsForTopic);
|
||||
|
||||
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
|
||||
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
|
||||
|
||||
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
||||
|
||||
final TypedColumn<Event, EventGroup> aggr = new EventSubsetAggregator(maxEventsForTopic).toColumn();
|
||||
|
||||
final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed");
|
||||
|
||||
final long now = new Date().getTime();
|
||||
|
||||
final Dataset<Event> subset = ClusterUtils
|
||||
.readPath(spark, eventsPath, Event.class)
|
||||
.groupByKey(e -> e.getTopic() + '@' + e.getMap().getTargetDatasourceId(), Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(EventGroup.class))
|
||||
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
|
||||
|
||||
final JavaRDD<String> inputRdd = subset
|
||||
.map(e -> prepareEventForIndexing(e, now, total), Encoders.STRING())
|
||||
.javaRDD();
|
||||
|
||||
final Map<String, String> esCfg = new HashMap<>();
|
||||
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
|
||||
|
||||
esCfg.put("es.index.auto.create", "false");
|
||||
esCfg.put("es.nodes", indexHost);
|
||||
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
|
||||
esCfg.put("es.batch.write.retry.count", "8");
|
||||
esCfg.put("es.batch.write.retry.wait", "60s");
|
||||
esCfg.put("es.batch.size.entries", "200");
|
||||
esCfg.put("es.nodes.wan.only", "true");
|
||||
|
||||
log.info("*** Start indexing");
|
||||
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||
log.info("*** End indexing");
|
||||
|
||||
log.info("*** Deleting old events");
|
||||
final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000);
|
||||
log.info("*** Deleted events: " + message);
|
||||
|
||||
}
|
||||
|
||||
private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception {
|
||||
final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l;
|
||||
final HttpDelete req = new HttpDelete(url);
|
||||
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static String prepareEventForIndexing(final Event e, final long creationDate, final LongAccumulator acc)
|
||||
throws JsonProcessingException {
|
||||
acc.add(1);
|
||||
|
||||
e.setCreationDate(creationDate);
|
||||
e.setExpiryDate(Long.MAX_VALUE);
|
||||
|
||||
return new ObjectMapper().writeValueAsString(e);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,233 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpDelete;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.broker.model.ConditionParams;
|
||||
import eu.dnetlib.dhp.broker.model.Event;
|
||||
import eu.dnetlib.dhp.broker.model.MappedFields;
|
||||
import eu.dnetlib.dhp.broker.model.Notification;
|
||||
import eu.dnetlib.dhp.broker.model.Subscription;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.NotificationGroup;
|
||||
import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils;
|
||||
|
||||
public class IndexNotificationsJob {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(IndexNotificationsJob.class);
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(IndexNotificationsJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
|
||||
final String eventsPath = parser.get("workingPath") + "/events";
|
||||
log.info("eventsPath: {}", eventsPath);
|
||||
|
||||
final String index = parser.get("index");
|
||||
log.info("index: {}", index);
|
||||
|
||||
final String indexHost = parser.get("esHost");
|
||||
log.info("indexHost: {}", indexHost);
|
||||
|
||||
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
|
||||
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
|
||||
|
||||
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
||||
|
||||
final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed");
|
||||
|
||||
final long startTime = new Date().getTime();
|
||||
|
||||
final List<Subscription> subscriptions = listSubscriptions(brokerApiBaseUrl);
|
||||
|
||||
log.info("Number of subscriptions: " + subscriptions.size());
|
||||
|
||||
if (subscriptions.size() > 0) {
|
||||
final Dataset<Notification> notifications = ClusterUtils
|
||||
.readPath(spark, eventsPath, Event.class)
|
||||
.map(e -> generateNotifications(e, subscriptions, startTime), Encoders.bean(NotificationGroup.class))
|
||||
.flatMap(g -> g.getData().iterator(), Encoders.bean(Notification.class));
|
||||
|
||||
final JavaRDD<String> inputRdd = notifications
|
||||
.map(n -> prepareForIndexing(n, total), Encoders.STRING())
|
||||
.javaRDD();
|
||||
|
||||
final Map<String, String> esCfg = new HashMap<>();
|
||||
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
|
||||
|
||||
esCfg.put("es.index.auto.create", "false");
|
||||
esCfg.put("es.nodes", indexHost);
|
||||
esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
|
||||
esCfg.put("es.batch.write.retry.count", "8");
|
||||
esCfg.put("es.batch.write.retry.wait", "60s");
|
||||
esCfg.put("es.batch.size.entries", "200");
|
||||
esCfg.put("es.nodes.wan.only", "true");
|
||||
|
||||
log.info("*** Start indexing");
|
||||
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||
log.info("*** End indexing");
|
||||
|
||||
log.info("*** Deleting old notifications");
|
||||
final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000);
|
||||
log.info("*** Deleted notifications: " + message);
|
||||
|
||||
log.info("*** sendNotifications (emails, ...)");
|
||||
sendNotifications(brokerApiBaseUrl, startTime - 1000);
|
||||
log.info("*** ALL done.");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private static NotificationGroup generateNotifications(final Event e,
|
||||
final List<Subscription> subscriptions,
|
||||
final long date) {
|
||||
final List<Notification> list = subscriptions
|
||||
.stream()
|
||||
.filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
|
||||
.filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap()))
|
||||
.map(s -> generateNotification(s, e, date))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return new NotificationGroup(list);
|
||||
}
|
||||
|
||||
private static Notification generateNotification(final Subscription s, final Event e, final long date) {
|
||||
final Notification n = new Notification();
|
||||
n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId()));
|
||||
n.setSubscriptionId(s.getSubscriptionId());
|
||||
n.setEventId(e.getEventId());
|
||||
n.setProducerId(e.getProducerId());
|
||||
n.setTopic(e.getTopic());
|
||||
n.setPayload(e.getPayload());
|
||||
n.setMap(e.getMap());
|
||||
n.setDate(date);
|
||||
return n;
|
||||
}
|
||||
|
||||
private static boolean verifyConditions(final MappedFields map,
|
||||
final Map<String, List<ConditionParams>> conditions) {
|
||||
if (conditions.containsKey("targetDatasourceName")
|
||||
&& !SubscriptionUtils
|
||||
.verifyExact(map.getTargetDatasourceName(), conditions.get("targetDatasourceName").get(0).getValue())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (conditions.containsKey("trust")
|
||||
&& !SubscriptionUtils
|
||||
.verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (conditions.containsKey("targetDateofacceptance") && !conditions
|
||||
.get("targetDateofacceptance")
|
||||
.stream()
|
||||
.anyMatch(c -> SubscriptionUtils
|
||||
.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (conditions.containsKey("targetResultTitle")
|
||||
&& !conditions
|
||||
.get("targetResultTitle")
|
||||
.stream()
|
||||
.anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (conditions.containsKey("targetAuthors")
|
||||
&& !conditions
|
||||
.get("targetAuthors")
|
||||
.stream()
|
||||
.allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (conditions.containsKey("targetSubjects")
|
||||
&& !conditions
|
||||
.get("targetSubjects")
|
||||
.stream()
|
||||
.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
private static List<Subscription> listSubscriptions(final String brokerApiBaseUrl) throws Exception {
|
||||
final String url = brokerApiBaseUrl + "/api/subscriptions";
|
||||
final HttpGet req = new HttpGet(url);
|
||||
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
final String s = IOUtils.toString(response.getEntity().getContent());
|
||||
return mapper
|
||||
.readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, Subscription.class));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception {
|
||||
final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l;
|
||||
final HttpDelete req = new HttpDelete(url);
|
||||
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String sendNotifications(final String brokerApiBaseUrl, final long l) throws IOException {
|
||||
final String url = brokerApiBaseUrl + "/api/openaireBroker/notifications/send/" + l;
|
||||
final HttpGet req = new HttpGet(url);
|
||||
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String prepareForIndexing(final Notification n, final LongAccumulator acc)
|
||||
throws JsonProcessingException {
|
||||
acc.add(1);
|
||||
return new ObjectMapper().writeValueAsString(n);
|
||||
}
|
||||
|
||||
}
|
|
@ -20,6 +20,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|||
import eu.dnetlib.dhp.broker.model.Event;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||
|
||||
@Deprecated
|
||||
public class IndexOnESJob {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(IndexOnESJob.class);
|
||||
|
@ -48,12 +49,13 @@ public class IndexOnESJob {
|
|||
|
||||
final JavaRDD<String> inputRdd = ClusterUtils
|
||||
.readPath(spark, eventsPath, Event.class)
|
||||
// .limit(10000) // TODO REMOVE
|
||||
.map(IndexOnESJob::eventAsJsonString, Encoders.STRING())
|
||||
.javaRDD();
|
||||
|
||||
final Map<String, String> esCfg = new HashMap<>();
|
||||
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
|
||||
|
||||
esCfg.put("es.index.auto.create", "false");
|
||||
esCfg.put("es.nodes", indexHost);
|
||||
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
|
||||
esCfg.put("es.batch.write.retry.count", "8");
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Notification;
|
||||
|
||||
public class NotificationGroup implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 720996471281158977L;
|
||||
|
||||
private List<Notification> data = new ArrayList<>();
|
||||
|
||||
public NotificationGroup() {
|
||||
}
|
||||
|
||||
public NotificationGroup(final List<Notification> data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public List<Notification> getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public void setData(final List<Notification> data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public NotificationGroup addElement(final Notification elem) {
|
||||
data.add(elem);
|
||||
return this;
|
||||
}
|
||||
|
||||
public NotificationGroup addGroup(final NotificationGroup group) {
|
||||
data.addAll(group.getData());
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
|
||||
public class SubscriptionUtils {
|
||||
|
||||
private static final long ONE_DAY = 86_400_000;
|
||||
|
||||
public static boolean verifyListSimilar(final List<String> list, final String value) {
|
||||
return list.stream().anyMatch(s -> verifySimilar(s, value));
|
||||
}
|
||||
|
||||
public static boolean verifyListExact(final List<String> list, final String value) {
|
||||
return list.stream().anyMatch(s -> verifyExact(s, value));
|
||||
}
|
||||
|
||||
public static boolean verifySimilar(final String s1, final String s2) {
|
||||
for (final String part : s2.split("\\W+")) {
|
||||
if (!StringUtils.containsIgnoreCase(s1, part)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean verifyFloatRange(final float trust, final String min, final String max) {
|
||||
return trust >= NumberUtils.toFloat(min, 0) && trust <= NumberUtils.toFloat(max, 1);
|
||||
}
|
||||
|
||||
public static boolean verifyDateRange(final long date, final String min, final String max) {
|
||||
try {
|
||||
return date >= DateUtils.parseDate(min, "yyyy-MM-dd").getTime()
|
||||
&& date < DateUtils.parseDate(max, "yyyy-MM-dd").getTime() + ONE_DAY;
|
||||
} catch (final ParseException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean verifyExact(final String s1, final String s2) {
|
||||
return StringUtils.equalsIgnoreCase(s1, s2);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.subset;
|
||||
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.expressions.Aggregator;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Event;
|
||||
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
|
||||
|
||||
public class EventSubsetAggregator extends Aggregator<Event, EventGroup, EventGroup> {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -678071078823059805L;
|
||||
|
||||
private final int maxEventsForTopic;
|
||||
|
||||
public EventSubsetAggregator(final int maxEventsForTopic) {
|
||||
this.maxEventsForTopic = maxEventsForTopic;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EventGroup zero() {
|
||||
return new EventGroup();
|
||||
}
|
||||
|
||||
@Override
|
||||
public EventGroup reduce(final EventGroup g, final Event e) {
|
||||
if (g.getData().size() < maxEventsForTopic) {
|
||||
g.getData().add(e);
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EventGroup merge(final EventGroup g0, final EventGroup g1) {
|
||||
final int missing = maxEventsForTopic - g0.getData().size();
|
||||
|
||||
if (missing > 0) {
|
||||
if (g1.getData().size() < missing) {
|
||||
g0.getData().addAll(g1.getData());
|
||||
} else {
|
||||
g0.getData().addAll(g1.getData().subList(0, missing));
|
||||
}
|
||||
}
|
||||
|
||||
return g0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EventGroup finish(final EventGroup g) {
|
||||
return g;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Encoder<EventGroup> outputEncoder() {
|
||||
return Encoders.bean(EventGroup.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Encoder<EventGroup> bufferEncoder() {
|
||||
return Encoders.bean(EventGroup.class);
|
||||
}
|
||||
|
||||
}
|
|
@ -25,13 +25,25 @@
|
|||
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>esIndexName</name>
|
||||
<description>the elasticsearch index name</description>
|
||||
<name>esEventIndexName</name>
|
||||
<description>the elasticsearch index name for events</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>esNotificationsIndexName</name>
|
||||
<description>the elasticsearch index name for notifications</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>esIndexHost</name>
|
||||
<description>the elasticsearch host</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>maxIndexedEventsForDsAndTopic</name>
|
||||
<description>the max number of events for each couple (ds/topic)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>brokerApiBaseUrl</name>
|
||||
<description>the url of the broker service api</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
|
@ -423,16 +435,16 @@
|
|||
<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
|
||||
<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
|
||||
</spark>
|
||||
<ok to="index_es"/>
|
||||
<ok to="index_event_subset"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="index_es">
|
||||
<action name="index_event_subset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>IndexOnESJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.IndexOnESJob</class>
|
||||
<name>IndexEventSubsetOnESJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.IndexEventSubsetJob</class>
|
||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
|
@ -445,8 +457,36 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--index</arg><arg>${esIndexName}</arg>
|
||||
<arg>--index</arg><arg>${esEventIndexName}</arg>
|
||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
|
||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||
</spark>
|
||||
<ok to="index_notifications"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="index_notifications">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>IndexNotificationsOnESJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.IndexNotificationsJob</class>
|
||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.dynamicAllocation.maxExecutors="8"
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||
</spark>
|
||||
<ok to="stats"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
[
|
||||
{
|
||||
"paramName": "o",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the workinh path",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "idx",
|
||||
"paramLongName": "index",
|
||||
"paramDescription": "the ES index",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "es",
|
||||
"paramLongName": "esHost",
|
||||
"paramDescription": "the ES host",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "n",
|
||||
"paramLongName": "maxEventsForTopic",
|
||||
"paramDescription": "the max number of events for each couple (ds/topic)",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "broker",
|
||||
"paramLongName": "brokerApiBaseUrl",
|
||||
"paramDescription": "the url of the broker service api",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,26 @@
|
|||
[
|
||||
{
|
||||
"paramName": "o",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the workinh path",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "idx",
|
||||
"paramLongName": "index",
|
||||
"paramDescription": "the ES index",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "es",
|
||||
"paramLongName": "esHost",
|
||||
"paramDescription": "the ES host",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "broker",
|
||||
"paramLongName": "brokerApiBaseUrl",
|
||||
"paramDescription": "the url of the broker service api",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -8,6 +8,41 @@
|
|||
<property>
|
||||
<name>workingPath</name>
|
||||
<description>the path where the the generated data will be stored</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>datasourceIdWhitelist</name>
|
||||
<value>-</value>
|
||||
<description>a white list (comma separeted, - for empty list) of datasource ids</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>datasourceTypeWhitelist</name>
|
||||
<value>-</value>
|
||||
<description>a white list (comma separeted, - for empty list) of datasource types</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>datasourceIdBlacklist</name>
|
||||
<value>-</value>
|
||||
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>esEventIndexName</name>
|
||||
<description>the elasticsearch index name for events</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>esNotificationsIndexName</name>
|
||||
<description>the elasticsearch index name for notifications</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>esIndexHost</name>
|
||||
<description>the elasticsearch host</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>maxIndexedEventsForDsAndTopic</name>
|
||||
<description>the max number of events for each couple (ds/topic)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>brokerApiBaseUrl</name>
|
||||
<description>the url of the broker service api</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
|
@ -64,44 +99,18 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="generate_events"/>
|
||||
<start to="index_notifications"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="generate_events">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateEventsJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.GenerateEventsJob</class>
|
||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--datasourceIdWhitelist</arg><arg>${datasourceIdWhitelist}</arg>
|
||||
<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
|
||||
<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
|
||||
</spark>
|
||||
<ok to="index_es"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="index_es">
|
||||
<action name="index_notifications">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>IndexOnESJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.IndexOnESJob</class>
|
||||
<name>IndexNotificationsOnESJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.IndexNotificationsJob</class>
|
||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
|
@ -114,37 +123,15 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--index</arg><arg>${esIndexName}</arg>
|
||||
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||
</spark>
|
||||
<ok to="stats"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="stats">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateStatsJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.GenerateStatsJob</class>
|
||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -0,0 +1,52 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class SubscriptionUtilsTest {
|
||||
|
||||
@Test
|
||||
void testVerifyListSimilar() {
|
||||
assertTrue(SubscriptionUtils.verifyListSimilar(Arrays.asList("Michele Artini", "Claudio Atzori"), "artini"));
|
||||
assertFalse(SubscriptionUtils.verifyListSimilar(Arrays.asList("Michele Artini", "Claudio Atzori"), "bardi"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testVerifyListExact() {
|
||||
assertTrue(SubscriptionUtils.verifyListExact(Arrays.asList("Java", "Perl"), "perl"));
|
||||
assertFalse(SubscriptionUtils.verifyListExact(Arrays.asList("Java", "Perl"), "C"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testVerifySimilar() {
|
||||
assertTrue(SubscriptionUtils.verifySimilar("Java Programming", "java"));
|
||||
assertFalse(SubscriptionUtils.verifySimilar("Java Programming", "soap"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testVerifyFloatRange() {
|
||||
assertTrue(SubscriptionUtils.verifyFloatRange(0.5f, "0.4", "0.6"));
|
||||
assertFalse(SubscriptionUtils.verifyFloatRange(0.8f, "0.4", "0.6"));
|
||||
assertTrue(SubscriptionUtils.verifyFloatRange(0.5f, "", ""));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testVerifyDateRange() {
|
||||
final long date = 1282738478000l; // 25 August 2010
|
||||
|
||||
assertTrue(SubscriptionUtils.verifyDateRange(date, "2010-01-01", "2011-01-01"));
|
||||
assertFalse(SubscriptionUtils.verifyDateRange(date, "2020-01-01", "2021-01-01"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testVerifyExact() {
|
||||
assertTrue(SubscriptionUtils.verifyExact("Java Programming", "java programming"));
|
||||
assertFalse(SubscriptionUtils.verifyExact("Java Programming", "soap programming"));
|
||||
}
|
||||
|
||||
}
|
|
@ -289,4 +289,12 @@ public class JsonPathTest {
|
|||
|
||||
System.out.println("d = " + d);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNull() throws Exception {
|
||||
final Object p = null;
|
||||
|
||||
System.out.println((String) p);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import java.util.Collection;
|
|||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.PairFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
@ -15,6 +16,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import com.google.common.collect.Lists;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
|
||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||
import scala.Tuple2;
|
||||
|
@ -30,10 +33,16 @@ public class DedupRecordFactory {
|
|||
final DedupConfig dedupConf) {
|
||||
long ts = System.currentTimeMillis();
|
||||
// <id, json_entity>
|
||||
final JavaPairRDD<String, String> inputJsonEntities = sc
|
||||
.textFile(entitiesInputPath)
|
||||
final JavaPairRDD<String, String> inputJsonEntities = spark
|
||||
.read()
|
||||
.load(entitiesInputPath)
|
||||
.as(Encoders.kryo(Oaf.class))
|
||||
.map(
|
||||
(MapFunction<Oaf, String>) p -> new org.codehaus.jackson.map.ObjectMapper().writeValueAsString(p),
|
||||
Encoders.STRING())
|
||||
.javaRDD()
|
||||
.mapToPair(
|
||||
(PairFunction<String, String, String>) it -> new Tuple2<String, String>(
|
||||
(PairFunction<String, String, String>) it -> new Tuple2<>(
|
||||
MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), it), it));
|
||||
|
||||
// <source, target>: source is the dedup_id, target is the id of the mergedIn
|
||||
|
@ -74,9 +83,9 @@ public class DedupRecordFactory {
|
|||
}
|
||||
}
|
||||
|
||||
private static Publication publicationMerger(Tuple2<String, Iterable<String>> e, final long ts) {
|
||||
private static DLIPublication publicationMerger(Tuple2<String, Iterable<String>> e, final long ts) {
|
||||
|
||||
Publication p = new Publication(); // the result of the merge, to be returned at the end
|
||||
DLIPublication p = new DLIPublication(); // the result of the merge, to be returned at the end
|
||||
|
||||
p.setId(e._1());
|
||||
|
||||
|
@ -91,7 +100,7 @@ public class DedupRecordFactory {
|
|||
.forEach(
|
||||
pub -> {
|
||||
try {
|
||||
Publication publication = mapper.readValue(pub, Publication.class);
|
||||
DLIPublication publication = mapper.readValue(pub, DLIPublication.class);
|
||||
|
||||
p.mergeFrom(publication);
|
||||
p.setAuthor(DedupUtility.mergeAuthor(p.getAuthor(), publication.getAuthor()));
|
||||
|
@ -110,9 +119,9 @@ public class DedupRecordFactory {
|
|||
return p;
|
||||
}
|
||||
|
||||
private static Dataset datasetMerger(Tuple2<String, Iterable<String>> e, final long ts) {
|
||||
private static DLIDataset datasetMerger(Tuple2<String, Iterable<String>> e, final long ts) {
|
||||
|
||||
Dataset d = new Dataset(); // the result of the merge, to be returned at the end
|
||||
DLIDataset d = new DLIDataset(); // the result of the merge, to be returned at the end
|
||||
|
||||
d.setId(e._1());
|
||||
|
||||
|
|
|
@ -9,18 +9,21 @@ import org.apache.spark.api.java.JavaPairRDD;
|
|||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.PairFunction;
|
||||
import org.apache.spark.graphx.Edge;
|
||||
import org.apache.spark.rdd.RDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import com.google.common.hash.Hashing;
|
||||
|
||||
import eu.dnetlib.dedup.graph.ConnectedComponent;
|
||||
import eu.dnetlib.dedup.graph.GraphProcessor;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||
|
@ -42,7 +45,6 @@ public class SparkCreateConnectedComponent {
|
|||
.master(parser.get("master"))
|
||||
.getOrCreate();
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
final String entity = parser.get("entity");
|
||||
final String targetPath = parser.get("targetPath");
|
||||
|
@ -50,8 +52,12 @@ public class SparkCreateConnectedComponent {
|
|||
// DedupConfig.load(IOUtils.toString(SparkCreateConnectedComponent.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/conf/org.curr.conf2.json")));
|
||||
final DedupConfig dedupConf = DedupConfig.load(parser.get("dedupConf"));
|
||||
|
||||
final JavaPairRDD<Object, String> vertexes = sc
|
||||
.textFile(inputPath + "/" + entity)
|
||||
final JavaPairRDD<Object, String> vertexes = spark
|
||||
.read()
|
||||
.load(inputPath + "/" + entity)
|
||||
.as(Encoders.kryo(Oaf.class))
|
||||
.map((MapFunction<Oaf, String>) p -> new ObjectMapper().writeValueAsString(p), Encoders.STRING())
|
||||
.javaRDD()
|
||||
.map(s -> MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), s))
|
||||
.mapToPair(
|
||||
(PairFunction<String, Object, String>) s -> new Tuple2<Object, String>(getHashcode(s), s));
|
||||
|
|
|
@ -4,10 +4,10 @@ package eu.dnetlib.dedup;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
|
@ -41,12 +41,19 @@ public class SparkCreateDedupRecord {
|
|||
DedupUtility.createEntityPath(sourcePath, entity),
|
||||
OafEntityType.valueOf(entity),
|
||||
dedupConf);
|
||||
dedupRecord
|
||||
.map(
|
||||
r -> {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
return mapper.writeValueAsString(r);
|
||||
})
|
||||
.saveAsTextFile(dedupPath + "/" + entity + "/dedup_records");
|
||||
spark
|
||||
.createDataset(dedupRecord.rdd(), Encoders.kryo(OafEntity.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(dedupPath + "/" + entity + "/dedup_records");
|
||||
//
|
||||
//
|
||||
// dedupRecord
|
||||
// .map(
|
||||
// r -> {
|
||||
// ObjectMapper mapper = new ObjectMapper();
|
||||
// return mapper.writeValueAsString(r);
|
||||
// })
|
||||
// .saveAsTextFile(dedupPath + "/" + entity + "/dedup_records");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,13 @@ import org.apache.commons.io.IOUtils;
|
|||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
|
@ -46,8 +49,13 @@ public class SparkCreateSimRels {
|
|||
// DedupConfig.load(IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
|
||||
final DedupConfig dedupConf = DedupConfig.load(parser.get("dedupConf"));
|
||||
|
||||
JavaPairRDD<String, MapDocument> mapDocument = sc
|
||||
.textFile(inputPath + "/" + entity)
|
||||
JavaPairRDD<String, MapDocument> mapDocument = spark
|
||||
.read()
|
||||
.load(inputPath + "/" + entity)
|
||||
.as(Encoders.kryo(Oaf.class))
|
||||
.map((MapFunction<Oaf, String>) p -> new ObjectMapper().writeValueAsString(p), Encoders.STRING())
|
||||
.javaRDD()
|
||||
.repartition(1000)
|
||||
.mapToPair(
|
||||
s -> {
|
||||
MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s);
|
||||
|
|
|
@ -1,29 +1,19 @@
|
|||
|
||||
package eu.dnetlib.dedup.sx;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class SparkPropagateRelationsJob {
|
||||
enum FieldType {
|
||||
SOURCE, TARGET
|
||||
}
|
||||
|
||||
static final String SOURCEJSONPATH = "$.source";
|
||||
static final String TARGETJSONPATH = "$.target";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
|
@ -39,7 +29,6 @@ public class SparkPropagateRelationsJob {
|
|||
.master(parser.get("master"))
|
||||
.getOrCreate();
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
final String relationPath = parser.get("relationPath");
|
||||
final String mergeRelPath = parser.get("mergeRelPath");
|
||||
final String targetRelPath = parser.get("targetRelPath");
|
||||
|
@ -50,7 +39,13 @@ public class SparkPropagateRelationsJob {
|
|||
.as(Encoders.bean(Relation.class))
|
||||
.where("relClass == 'merges'");
|
||||
|
||||
final Dataset<Relation> rels = spark.read().load(relationPath).as(Encoders.bean(Relation.class));
|
||||
final Dataset<Relation> rels = spark
|
||||
.read()
|
||||
.load(relationPath)
|
||||
.as(Encoders.kryo(Relation.class))
|
||||
.map(
|
||||
(MapFunction<Relation, Relation>) r -> r,
|
||||
Encoders.bean(Relation.class));
|
||||
|
||||
final Dataset<Relation> firstJoin = rels
|
||||
.joinWith(merge, merge.col("target").equalTo(rels.col("source")), "left_outer")
|
||||
|
@ -58,9 +53,10 @@ public class SparkPropagateRelationsJob {
|
|||
(MapFunction<Tuple2<Relation, Relation>, Relation>) r -> {
|
||||
final Relation mergeRelation = r._2();
|
||||
final Relation relation = r._1();
|
||||
|
||||
if (mergeRelation != null)
|
||||
relation.setSource(mergeRelation.getSource());
|
||||
if (relation.getDataInfo() == null)
|
||||
relation.setDataInfo(OafUtils.generateDataInfo("0.9", false));
|
||||
return relation;
|
||||
},
|
||||
Encoders.bean(Relation.class));
|
||||
|
@ -75,38 +71,8 @@ public class SparkPropagateRelationsJob {
|
|||
relation.setTarget(mergeRelation.getSource());
|
||||
return relation;
|
||||
},
|
||||
Encoders.bean(Relation.class));
|
||||
Encoders.kryo(Relation.class));
|
||||
|
||||
secondJoin.write().mode(SaveMode.Overwrite).save(targetRelPath);
|
||||
}
|
||||
|
||||
private static boolean containsDedup(final String json) {
|
||||
final String source = DHPUtils.getJPathString(SOURCEJSONPATH, json);
|
||||
final String target = DHPUtils.getJPathString(TARGETJSONPATH, json);
|
||||
|
||||
return source.toLowerCase().contains("dedup") || target.toLowerCase().contains("dedup");
|
||||
}
|
||||
|
||||
private static String replaceField(final String json, final String id, final FieldType type) {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
try {
|
||||
Relation relation = mapper.readValue(json, Relation.class);
|
||||
if (relation.getDataInfo() == null)
|
||||
relation.setDataInfo(new DataInfo());
|
||||
relation.getDataInfo().setDeletedbyinference(false);
|
||||
switch (type) {
|
||||
case SOURCE:
|
||||
relation.setSource(id);
|
||||
return mapper.writeValueAsString(relation);
|
||||
case TARGET:
|
||||
relation.setTarget(id);
|
||||
return mapper.writeValueAsString(relation);
|
||||
default:
|
||||
throw new IllegalArgumentException("");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("unable to deserialize json relation: " + json, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
package eu.dnetlib.dedup.sx
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation}
|
||||
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIUnknown, OafUtils}
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.LoggerFactory
|
||||
import org.apache.spark.sql.functions.col
|
||||
|
||||
object SparkUpdateEntityWithDedupInfo {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkUpdateEntityWithDedupInfo.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/dedup/dedup_delete_by_inference_parameters.json")))
|
||||
val logger = LoggerFactory.getLogger(SparkUpdateEntityWithDedupInfo.getClass)
|
||||
parser.parseArgument(args)
|
||||
|
||||
val workingPath: String = parser.get("workingPath")
|
||||
logger.info(s"Working dir path = $workingPath")
|
||||
|
||||
implicit val oafEncoder: Encoder[OafEntity] = Encoders.kryo[OafEntity]
|
||||
implicit val relEncoder: Encoder[Relation] = Encoders.bean(classOf[Relation])
|
||||
|
||||
implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo[DLIPublication]
|
||||
implicit val datEncoder: Encoder[DLIDataset] = Encoders.kryo[DLIDataset]
|
||||
implicit val unkEncoder: Encoder[DLIUnknown] = Encoders.kryo[DLIUnknown]
|
||||
|
||||
|
||||
|
||||
val spark: SparkSession = SparkSession
|
||||
.builder()
|
||||
.appName(SparkUpdateEntityWithDedupInfo.getClass.getSimpleName)
|
||||
.master(parser.get("master"))
|
||||
.getOrCreate()
|
||||
|
||||
|
||||
val entityPath = parser.get("entityPath")
|
||||
val mergeRelPath = parser.get("mergeRelPath")
|
||||
val dedupRecordPath = parser.get("dedupRecordPath")
|
||||
val entity = parser.get("entity")
|
||||
val destination = parser.get("targetPath")
|
||||
|
||||
val mergedIds = spark.read.load(mergeRelPath).as[Relation]
|
||||
.where("relClass == 'merges'")
|
||||
.select(col("target"))
|
||||
|
||||
|
||||
val entities: Dataset[(String, OafEntity)] = spark
|
||||
.read
|
||||
.load(entityPath).as[OafEntity]
|
||||
.map(o => (o.getId, o))(Encoders.tuple(Encoders.STRING, oafEncoder))
|
||||
|
||||
|
||||
val finalDataset:Dataset[OafEntity] = entities.joinWith(mergedIds, entities("_1").equalTo(mergedIds("target")), "left")
|
||||
.map(k => {
|
||||
val e: OafEntity = k._1._2
|
||||
val t = k._2
|
||||
if (t != null && t.getString(0).nonEmpty) {
|
||||
if (e.getDataInfo == null) {
|
||||
e.setDataInfo(OafUtils.generateDataInfo())
|
||||
}
|
||||
e.getDataInfo.setDeletedbyinference(true)
|
||||
}
|
||||
e
|
||||
})
|
||||
|
||||
val dedupRecords :Dataset[OafEntity] = spark.read.load(dedupRecordPath).as[OafEntity]
|
||||
|
||||
finalDataset.union(dedupRecords)
|
||||
.repartition(1200).write
|
||||
.mode(SaveMode.Overwrite).save(destination)
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -53,6 +53,7 @@
|
|||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>-mt</arg><arg>yarn-cluster</arg>
|
||||
|
@ -77,6 +78,7 @@
|
|||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>-mt</arg><arg>yarn-cluster</arg>
|
||||
|
@ -101,6 +103,7 @@
|
|||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>-mt</arg><arg>yarn-cluster</arg>
|
||||
|
@ -125,6 +128,7 @@
|
|||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>-mt</arg><arg>yarn-cluster</arg>
|
||||
|
@ -144,11 +148,12 @@
|
|||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Update ${entity} and add DedupRecord</name>
|
||||
<class>eu.dnetlib.dedup.sx.SparkUpdateEntityJob</class>
|
||||
<class>eu.dnetlib.dedup.sx.SparkUpdateEntityWithDedupInfo</class>
|
||||
<jar>dhp-dedup-scholexplorer-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>-mt</arg><arg>yarn-cluster</arg>
|
||||
|
|
|
@ -109,9 +109,9 @@ public class PropagationConstant {
|
|||
}
|
||||
|
||||
public static String getConstraintList(String text, List<String> constraints) {
|
||||
String ret = " and (" + text + constraints.get(0) + "'";
|
||||
String ret = " and (" + text + constraints.get(0).toLowerCase() + "'";
|
||||
for (int i = 1; i < constraints.size(); i++) {
|
||||
ret += " OR " + text + constraints.get(i) + "'";
|
||||
ret += " OR " + text + constraints.get(i).toLowerCase() + "'";
|
||||
}
|
||||
ret += ")";
|
||||
return ret;
|
||||
|
|
|
@ -110,13 +110,6 @@ public class CommunityConfigurationFactory {
|
|||
}
|
||||
|
||||
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
||||
final Node oacommunitynode = node.selectSingleNode("./oacommunity");
|
||||
String oacommunity = null;
|
||||
if (oacommunitynode != null) {
|
||||
String tmp = oacommunitynode.getText();
|
||||
if (StringUtils.isNotBlank(tmp))
|
||||
oacommunity = tmp;
|
||||
}
|
||||
|
||||
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
||||
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
|
||||
|
@ -127,11 +120,7 @@ public class CommunityConfigurationFactory {
|
|||
|
||||
zenodoCommunityList.add(zc);
|
||||
}
|
||||
if (oacommunity != null) {
|
||||
ZenodoCommunity zc = new ZenodoCommunity();
|
||||
zc.setZenodoCommunityId(oacommunity);
|
||||
zenodoCommunityList.add(zc);
|
||||
}
|
||||
|
||||
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
||||
return zenodoCommunityList;
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ public class Provider implements Serializable {
|
|||
}
|
||||
|
||||
private void setSelCriteria(String json, VerbResolver resolver) {
|
||||
log.info("Selection constraints for datasource = " + json);
|
||||
log.debug("Selection constraints for datasource = " + json);
|
||||
selectionConstraints = new Gson().fromJson(json, SelectionConstraints.class);
|
||||
|
||||
selectionConstraints.setSelection(resolver);
|
||||
|
@ -54,7 +54,7 @@ public class Provider implements Serializable {
|
|||
try {
|
||||
setSelCriteria(n.getText(), resolver);
|
||||
} catch (Exception e) {
|
||||
log.info("not set selection criteria... ");
|
||||
log.debug("not set selection criteria... ");
|
||||
selectionConstraints = null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,9 @@ public class QueryInformationSystem {
|
|||
+ " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept "
|
||||
+ " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept "
|
||||
+ " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept "
|
||||
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] "
|
||||
+
|
||||
"let $zenodo := $x//param[./@name='zenodoCommunity']/text() "
|
||||
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' "
|
||||
+ " return "
|
||||
+ " <community> "
|
||||
+ " { $x//CONFIGURATION/context/@id} "
|
||||
|
@ -38,8 +40,15 @@ public class QueryInformationSystem {
|
|||
+ " {$d/param[./@name='selcriteria']/text()} "
|
||||
+ " </selcriteria> "
|
||||
+ " </datasource> } "
|
||||
+ " </datasources> "
|
||||
+ " <zenodocommunities> "
|
||||
+ " </datasources> " +
|
||||
" <zenodocommunities> " +
|
||||
"{for $zc in $zenodo " +
|
||||
"return " +
|
||||
"<zenodocommunity> " +
|
||||
"<zenodoid> " +
|
||||
"{$zc} " +
|
||||
"</zenodoid> " +
|
||||
"</zenodocommunity>}"
|
||||
+ " {for $zc in $communities "
|
||||
+ " return "
|
||||
+ " <zenodocommunity> "
|
||||
|
|
|
@ -20,8 +20,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
|||
/** Created by miriam on 02/08/2018. */
|
||||
public class ResultTagger implements Serializable {
|
||||
|
||||
private String trust = "0.8";
|
||||
|
||||
private boolean clearContext(Result result) {
|
||||
int tmp = result.getContext().size();
|
||||
List<Context> clist = result
|
||||
|
@ -71,10 +69,10 @@ public class ResultTagger implements Serializable {
|
|||
|
||||
// tagging for Subject
|
||||
final Set<String> subjects = new HashSet<>();
|
||||
Optional<List<StructuredProperty>> oresultsubj = Optional.ofNullable(result.getSubject());
|
||||
if (oresultsubj.isPresent()) {
|
||||
oresultsubj
|
||||
.get()
|
||||
|
||||
if (Objects.nonNull(result.getSubject())) {
|
||||
result
|
||||
.getSubject()
|
||||
.stream()
|
||||
.map(subject -> subject.getValue())
|
||||
.filter(StringUtils::isNotBlank)
|
||||
|
@ -90,15 +88,23 @@ public class ResultTagger implements Serializable {
|
|||
final Set<String> datasources = new HashSet<>();
|
||||
final Set<String> tmp = new HashSet<>();
|
||||
|
||||
Optional<List<Instance>> oresultinstance = Optional.ofNullable(result.getInstance());
|
||||
if (oresultinstance.isPresent()) {
|
||||
for (Instance i : oresultinstance.get()) {
|
||||
tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
|
||||
tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
|
||||
if (Objects.nonNull(result.getInstance())) {
|
||||
for (Instance i : result.getInstance()) {
|
||||
if (Objects.nonNull(i.getCollectedfrom())) {
|
||||
if (Objects.nonNull(i.getCollectedfrom().getKey())) {
|
||||
tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
|
||||
}
|
||||
}
|
||||
if (Objects.nonNull(i.getHostedby())) {
|
||||
if (Objects.nonNull(i.getHostedby().getKey())) {
|
||||
tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
oresultinstance
|
||||
.get()
|
||||
result
|
||||
.getInstance()
|
||||
.stream()
|
||||
.map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
|
||||
.flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
|
||||
|
@ -163,21 +169,24 @@ public class ResultTagger implements Serializable {
|
|||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_SUBJECT,
|
||||
CLASS_NAME_BULKTAG_SUBJECT));
|
||||
CLASS_NAME_BULKTAG_SUBJECT,
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(c.getId()))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_DATASOURCE,
|
||||
CLASS_NAME_BULKTAG_DATASOURCE));
|
||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(c.getId()))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_CZENODO,
|
||||
CLASS_NAME_BULKTAG_ZENODO));
|
||||
CLASS_NAME_BULKTAG_ZENODO,
|
||||
TAGGING_TRUST));
|
||||
}
|
||||
return c;
|
||||
})
|
||||
|
@ -203,21 +212,24 @@ public class ResultTagger implements Serializable {
|
|||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_SUBJECT,
|
||||
CLASS_NAME_BULKTAG_SUBJECT));
|
||||
CLASS_NAME_BULKTAG_SUBJECT,
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_DATASOURCE,
|
||||
CLASS_NAME_BULKTAG_DATASOURCE));
|
||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_CZENODO,
|
||||
CLASS_NAME_BULKTAG_ZENODO));
|
||||
CLASS_NAME_BULKTAG_ZENODO,
|
||||
TAGGING_TRUST));
|
||||
context.setDataInfo(dataInfoList);
|
||||
return context;
|
||||
})
|
||||
|
@ -228,11 +240,12 @@ public class ResultTagger implements Serializable {
|
|||
}
|
||||
|
||||
public static DataInfo getDataInfo(
|
||||
String inference_provenance, String inference_class_id, String inference_class_name) {
|
||||
String inference_provenance, String inference_class_id, String inference_class_name, String trust) {
|
||||
DataInfo di = new DataInfo();
|
||||
di.setInferred(true);
|
||||
di.setInferenceprovenance(inference_provenance);
|
||||
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
|
||||
di.setTrust(trust);
|
||||
return di;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,4 +14,6 @@ public class TaggingConstants {
|
|||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
||||
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
|
||||
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
||||
|
||||
public static final String TAGGING_TRUST = "0.8";
|
||||
}
|
||||
|
|
|
@ -96,27 +96,6 @@ public class PrepareDatasourceCountryAssociation {
|
|||
relation.createOrReplaceTempView("relation");
|
||||
organization.createOrReplaceTempView("organization");
|
||||
|
||||
// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
|
||||
// + "FROM ( SELECT id "
|
||||
// + " FROM datasource "
|
||||
// + " WHERE (datainfo.deletedbyinference = false "
|
||||
// + whitelisted
|
||||
// + ") "
|
||||
// + getConstraintList("datasourcetype.classid = '", allowedtypes)
|
||||
// + ") d "
|
||||
// + "JOIN ( SELECT source, target "
|
||||
// + " FROM relation "
|
||||
// + " WHERE relclass = '"
|
||||
// + ModelConstants.IS_PROVIDED_BY
|
||||
// + "' "
|
||||
// + " AND datainfo.deletedbyinference = false ) rel "
|
||||
// + "ON d.id = rel.source "
|
||||
// + "JOIN (SELECT id, country "
|
||||
// + " FROM organization "
|
||||
// + " WHERE datainfo.deletedbyinference = false "
|
||||
// + " AND length(country.classid) > 0) o "
|
||||
// + "ON o.id = rel.target";
|
||||
|
||||
String query = "SELECT source dataSourceId, " +
|
||||
"named_struct('classid', country.classid, 'classname', country.classname) country " +
|
||||
"FROM datasource d " +
|
||||
|
@ -125,7 +104,7 @@ public class PrepareDatasourceCountryAssociation {
|
|||
"JOIN organization o " +
|
||||
"ON o.id = rel.target " +
|
||||
"WHERE rel.datainfo.deletedbyinference = false " +
|
||||
"and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" +
|
||||
"and lower(rel.relclass) = '" + ModelConstants.IS_PROVIDED_BY.toLowerCase() + "'" +
|
||||
"and o.datainfo.deletedbyinference = false " +
|
||||
"and length(o.country.classid) > 0 " +
|
||||
"and (" + allowed + " or " + whitelisted + ")";
|
||||
|
|
|
@ -102,15 +102,17 @@ public class PrepareResultOrcidAssociationStep1 {
|
|||
+ " FROM result "
|
||||
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
||||
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
||||
+ " WHERE MyP.qualifier.classid = 'ORCID') tmp "
|
||||
+ " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp "
|
||||
+ " GROUP BY id) r_t "
|
||||
+ " JOIN ("
|
||||
+ " SELECT source, target "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ getConstraintList(" relclass = '", allowedsemrel)
|
||||
+ getConstraintList(" lower(relclass) = '", allowedsemrel)
|
||||
+ " ) rel_rel "
|
||||
+ " ON source = id";
|
||||
|
||||
log.info("executedQuery: {}", query);
|
||||
spark
|
||||
.sql(query)
|
||||
.as(Encoders.bean(ResultOrcidList.class))
|
||||
|
|
|
@ -85,8 +85,8 @@ public class PrepareProjectResultsAssociation {
|
|||
String resproj_relation_query = "SELECT source, target "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ " AND relClass = '"
|
||||
+ ModelConstants.IS_PRODUCED_BY
|
||||
+ " AND lower(relClass) = '"
|
||||
+ ModelConstants.IS_PRODUCED_BY.toLowerCase()
|
||||
+ "'";
|
||||
|
||||
Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
|
||||
|
@ -98,7 +98,7 @@ public class PrepareProjectResultsAssociation {
|
|||
+ " FROM (SELECT source, target "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ getConstraintList(" relClass = '", allowedsemrel)
|
||||
+ getConstraintList(" lower(relClass) = '", allowedsemrel)
|
||||
+ " ) r1"
|
||||
+ " JOIN resproj_relation r2 "
|
||||
+ " ON r1.source = r2.source "
|
||||
|
|
|
@ -76,14 +76,14 @@ public class PrepareResultCommunitySet {
|
|||
+ "FROM (SELECT source, target "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ " AND relClass = '"
|
||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION
|
||||
+ " AND lower(relClass) = '"
|
||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
|
||||
+ "') result_organization "
|
||||
+ "LEFT JOIN (SELECT source, collect_set(target) org_set "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ " AND relClass = '"
|
||||
+ ModelConstants.MERGES
|
||||
+ " AND lower(relClass) = '"
|
||||
+ ModelConstants.MERGES.toLowerCase()
|
||||
+ "' "
|
||||
+ " GROUP BY source) organization_organization "
|
||||
+ "ON result_organization.target = organization_organization.source ";
|
||||
|
|
|
@ -144,8 +144,8 @@ public class PrepareResultCommunitySetStep1 {
|
|||
String resultContextQuery = String
|
||||
.format(
|
||||
RESULT_CONTEXT_QUERY_TEMPLATE,
|
||||
getConstraintList(" co.id = '", communityIdList),
|
||||
getConstraintList(" relClass = '", allowedsemrel));
|
||||
getConstraintList(" lower(co.id) = '", communityIdList),
|
||||
getConstraintList(" lower(relClass) = '", allowedsemrel));
|
||||
|
||||
Dataset<Row> result_context = spark.sql(resultContextQuery);
|
||||
result_context.createOrReplaceTempView("result_context");
|
||||
|
|
|
@ -91,8 +91,8 @@ public class PrepareResultInstRepoAssociation {
|
|||
+ "AND datainfo.deletedbyinference = false ) d "
|
||||
+ "JOIN ( SELECT source, target "
|
||||
+ "FROM relation "
|
||||
+ "WHERE relclass = '"
|
||||
+ ModelConstants.IS_PROVIDED_BY
|
||||
+ "WHERE lower(relclass) = '"
|
||||
+ ModelConstants.IS_PROVIDED_BY.toLowerCase()
|
||||
+ "' "
|
||||
+ "AND datainfo.deletedbyinference = false ) rel "
|
||||
+ "ON d.id = rel.source ";
|
||||
|
@ -111,8 +111,8 @@ public class PrepareResultInstRepoAssociation {
|
|||
String query = "Select source resultId, collect_set(target) organizationSet "
|
||||
+ "from relation "
|
||||
+ "where datainfo.deletedbyinference = false "
|
||||
+ "and relClass = '"
|
||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION
|
||||
+ "and lower(relClass) = '"
|
||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
|
||||
+ "' "
|
||||
+ "group by source";
|
||||
|
||||
|
|
|
@ -266,7 +266,6 @@
|
|||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -298,7 +297,6 @@
|
|||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -330,7 +328,6 @@
|
|||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -362,7 +359,6 @@
|
|||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -176,7 +176,6 @@
|
|||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
|
@ -206,7 +205,6 @@
|
|||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
|
@ -236,7 +234,6 @@
|
|||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
|
@ -266,7 +263,6 @@
|
|||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
|
|
|
@ -106,12 +106,6 @@
|
|||
<subject>aqua</subject>
|
||||
<subject>sea</subject>
|
||||
</subjects>
|
||||
<providers>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</providers>
|
||||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="aginfra">
|
||||
|
@ -163,7 +157,11 @@
|
|||
<zenodocommunities/>
|
||||
</community>
|
||||
<community id="clarin">
|
||||
<oacommunity>oac_clarin</oacommunity>
|
||||
<zenodocommunities>
|
||||
<zenodocommunity>
|
||||
<zenodoid>oac_clarin</zenodoid>
|
||||
</zenodocommunity>
|
||||
</zenodocommunities>
|
||||
<subjects/>
|
||||
<providers>
|
||||
<datasource>
|
||||
|
|
|
@ -257,6 +257,9 @@
|
|||
<zenodoid>bodhgaya</zenodoid>
|
||||
<selcriteria/>
|
||||
</zenodocommunity>
|
||||
<zenodocommunity>
|
||||
<zenodoid>oac_dh-ch</zenodoid>
|
||||
</zenodocommunity>
|
||||
</zenodocommunities>
|
||||
<organizations/>
|
||||
</community>
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue