Merge remote-tracking branch 'upstream/master' into orcid-no-doi

pull/42/head
Enrico Ottonello 4 years ago
commit eb8c9b2348

@ -19,7 +19,7 @@
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>

@ -87,6 +87,11 @@
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
</dependency>
</dependencies>
</project>

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.common.api;
import java.io.IOException;
import java.io.InputStream;
import okhttp3.MediaType;
import okhttp3.RequestBody;
import okhttp3.internal.Util;
import okio.BufferedSink;
import okio.Okio;
import okio.Source;
public class InputStreamRequestBody extends RequestBody {
private InputStream inputStream;
private MediaType mediaType;
private long lenght;
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
return new InputStreamRequestBody(inputStream, mediaType, len);
}
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
this.inputStream = inputStream;
this.mediaType = mediaType;
this.lenght = len;
}
@Override
public MediaType contentType() {
return mediaType;
}
@Override
public long contentLength() {
return lenght;
}
@Override
public void writeTo(BufferedSink sink) throws IOException {
Source source = null;
try {
source = Okio.source(inputStream);
sink.writeAll(source);
} finally {
Util.closeQuietly(source);
}
}
}

@ -0,0 +1,8 @@
package eu.dnetlib.dhp.common.api;
public class MissingConceptDoiException extends Throwable {
public MissingConceptDoiException(String message) {
super(message);
}
}

@ -0,0 +1,264 @@
package eu.dnetlib.dhp.common.api;
import java.io.*;
import java.io.IOException;
import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
import okhttp3.*;
public class ZenodoAPIClient implements Serializable {
String urlString;
String bucket;
String deposition_id;
String access_token;
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
public String getUrlString() {
return urlString;
}
public void setUrlString(String urlString) {
this.urlString = urlString;
}
public String getBucket() {
return bucket;
}
public void setBucket(String bucket) {
this.bucket = bucket;
}
public void setDeposition_id(String deposition_id) {
this.deposition_id = deposition_id;
}
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
this.urlString = urlString;
this.access_token = access_token;
}
/**
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
* @return response code
* @throws IOException
*/
public int newDeposition() throws IOException {
String json = "{}";
OkHttpClient httpClient = new OkHttpClient();
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, json);
Request request = new Request.Builder()
.url(urlString)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
// Get response body
json = response.body().string();
ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
this.bucket = newSubmission.getLinks().getBucket();
this.deposition_id = newSubmission.getId();
return response.code();
}
}
/**
* Upload files in Zenodo.
* @param is the inputStream for the file to upload
* @param file_name the name of the file as it will appear on Zenodo
* @param len the size of the file
* @return the response code
*/
public int uploadIS(InputStream is, String file_name, long len) throws IOException {
OkHttpClient httpClient = new OkHttpClient();
Request request = new Request.Builder()
.url(bucket + "/" + file_name)
.addHeader("Content-Type", "application/zip") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.code();
}
}
/**
* Associates metadata information to the current deposition
* @param metadata the metadata
* @return response code
* @throws IOException
*/
public int sendMretadata(String metadata) throws IOException {
OkHttpClient httpClient = new OkHttpClient();
RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, metadata);
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.put(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.code();
}
}
/**
* To publish the current deposition. It works for both new deposition or new version of an old deposition
* @return response code
* @throws IOException
*/
public int publish() throws IOException {
String json = "{}";
OkHttpClient httpClient = new OkHttpClient();
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/publish")
.addHeader("Authorization", "Bearer " + access_token)
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.code();
}
}
/**
* To create a new version of an already published deposition.
* It sets the deposition_id and the bucket to be used for the new version.
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is
* the last part of the url for the DOI Zenodo suggests to use to cite all versions:
* DOI: 10.xxx/zenodo.656930 concept_rec_id = 656930
* @return response code
* @throws IOException
* @throws MissingConceptDoiException
*/
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
setDepositionId(concept_rec_id);
String json = "{}";
OkHttpClient httpClient = new OkHttpClient();
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/newversion")
.addHeader("Authorization", "Bearer " + access_token)
.post(RequestBody.create(MEDIA_TYPE_JSON, json))
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return response.code();
}
}
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
for (ZenodoModel zm : zenodoModelList) {
if (zm.getConceptrecid().equals(concept_rec_id)) {
deposition_id = zm.getId();
return;
}
}
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
}
private String getPrevDepositions() throws IOException {
OkHttpClient httpClient = new OkHttpClient();
Request request = new Request.Builder()
.url(urlString)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.body().string();
}
}
private String getBucket(String url) throws IOException {
OkHttpClient httpClient = new OkHttpClient();
Request request = new Request.Builder()
.url(url)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
// Get response body
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
return zenodoModel.getLinks().getBucket();
}
}
}

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.common.api.zenodo;
public class Community {
private String identifier;
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
}

@ -0,0 +1,47 @@
package eu.dnetlib.dhp.common.api.zenodo;
public class Creator {
private String affiliation;
private String name;
private String orcid;
public String getAffiliation() {
return affiliation;
}
public void setAffiliation(String affiliation) {
this.affiliation = affiliation;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
public static Creator newInstance(String name, String affiliation, String orcid) {
Creator c = new Creator();
if (!(name == null)) {
c.name = name;
}
if (!(affiliation == null)) {
c.affiliation = affiliation;
}
if (!(orcid == null)) {
c.orcid = orcid;
}
return c;
}
}

@ -0,0 +1,58 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
import net.minidev.json.annotate.JsonIgnore;
public class File implements Serializable {
private String checksum;
private String filename;
private long filesize;
private String id;
@JsonIgnore
// private Links links;
public String getChecksum() {
return checksum;
}
public void setChecksum(String checksum) {
this.checksum = checksum;
}
public String getFilename() {
return filename;
}
public void setFilename(String filename) {
this.filename = filename;
}
public long getFilesize() {
return filesize;
}
public void setFilesize(long filesize) {
this.filesize = filesize;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
// @JsonIgnore
// public Links getLinks() {
// return links;
// }
//
// @JsonIgnore
// public void setLinks(Links links) {
// this.links = links;
// }
}

@ -0,0 +1,23 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
public class Grant implements Serializable {
private String id;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public static Grant newInstance(String id) {
Grant g = new Grant();
g.id = id;
return g;
}
}

@ -0,0 +1,92 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
public class Links implements Serializable {
private String bucket;
private String discard;
private String edit;
private String files;
private String html;
private String latest_draft;
private String latest_draft_html;
private String publish;
private String self;
public String getBucket() {
return bucket;
}
public void setBucket(String bucket) {
this.bucket = bucket;
}
public String getDiscard() {
return discard;
}
public void setDiscard(String discard) {
this.discard = discard;
}
public String getEdit() {
return edit;
}
public void setEdit(String edit) {
this.edit = edit;
}
public String getFiles() {
return files;
}
public void setFiles(String files) {
this.files = files;
}
public String getHtml() {
return html;
}
public void setHtml(String html) {
this.html = html;
}
public String getLatest_draft() {
return latest_draft;
}
public void setLatest_draft(String latest_draft) {
this.latest_draft = latest_draft;
}
public String getLatest_draft_html() {
return latest_draft_html;
}
public void setLatest_draft_html(String latest_draft_html) {
this.latest_draft_html = latest_draft_html;
}
public String getPublish() {
return publish;
}
public void setPublish(String publish) {
this.publish = publish;
}
public String getSelf() {
return self;
}
public void setSelf(String self) {
this.self = self;
}
}

@ -0,0 +1,153 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
import java.util.List;
public class Metadata implements Serializable {
private String access_right;
private List<Community> communities;
private List<Creator> creators;
private String description;
private String doi;
private List<Grant> grants;
private List<String> keywords;
private String language;
private String license;
private PrereserveDoi prereserve_doi;
private String publication_date;
private List<String> references;
private List<RelatedIdentifier> related_identifiers;
private String title;
private String upload_type;
private String version;
public String getUpload_type() {
return upload_type;
}
public void setUpload_type(String upload_type) {
this.upload_type = upload_type;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
public String getAccess_right() {
return access_right;
}
public void setAccess_right(String access_right) {
this.access_right = access_right;
}
public List<Community> getCommunities() {
return communities;
}
public void setCommunities(List<Community> communities) {
this.communities = communities;
}
public List<Creator> getCreators() {
return creators;
}
public void setCreators(List<Creator> creators) {
this.creators = creators;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getDoi() {
return doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public List<Grant> getGrants() {
return grants;
}
public void setGrants(List<Grant> grants) {
this.grants = grants;
}
public List<String> getKeywords() {
return keywords;
}
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public String getLicense() {
return license;
}
public void setLicense(String license) {
this.license = license;
}
public PrereserveDoi getPrereserve_doi() {
return prereserve_doi;
}
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
this.prereserve_doi = prereserve_doi;
}
public String getPublication_date() {
return publication_date;
}
public void setPublication_date(String publication_date) {
this.publication_date = publication_date;
}
public List<String> getReferences() {
return references;
}
public void setReferences(List<String> references) {
this.references = references;
}
public List<RelatedIdentifier> getRelated_identifiers() {
return related_identifiers;
}
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
this.related_identifiers = related_identifiers;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

@ -0,0 +1,25 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
public class PrereserveDoi implements Serializable {
private String doi;
private String recid;
public String getDoi() {
return doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public String getRecid() {
return recid;
}
public void setRecid(String recid) {
this.recid = recid;
}
}

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
public class RelatedIdentifier implements Serializable {
private String identifier;
private String relation;
private String resource_type;
private String scheme;
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
public String getRelation() {
return relation;
}
public void setRelation(String relation) {
this.relation = relation;
}
public String getResource_type() {
return resource_type;
}
public void setResource_type(String resource_type) {
this.resource_type = resource_type;
}
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
}

@ -0,0 +1,118 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable;
import java.util.List;
public class ZenodoModel implements Serializable {
private String conceptrecid;
private String created;
private List<File> files;
private String id;
private Links links;
private Metadata metadata;
private String modified;
private String owner;
private String record_id;
private String state;
private boolean submitted;
private String title;
public String getConceptrecid() {
return conceptrecid;
}
public void setConceptrecid(String conceptrecid) {
this.conceptrecid = conceptrecid;
}
public String getCreated() {
return created;
}
public void setCreated(String created) {
this.created = created;
}
public List<File> getFiles() {
return files;
}
public void setFiles(List<File> files) {
this.files = files;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Links getLinks() {
return links;
}
public void setLinks(Links links) {
this.links = links;
}
public Metadata getMetadata() {
return metadata;
}
public void setMetadata(Metadata metadata) {
this.metadata = metadata;
}
public String getModified() {
return modified;
}
public void setModified(String modified) {
this.modified = modified;
}
public String getOwner() {
return owner;
}
public void setOwner(String owner) {
this.owner = owner;
}
public String getRecord_id() {
return record_id;
}
public void setRecord_id(String record_id) {
this.record_id = record_id;
}
public String getState() {
return state;
}
public void setState(String state) {
this.state = state;
}
public boolean isSubmitted() {
return submitted;
}
public void setSubmitted(boolean submitted) {
this.submitted = submitted;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

@ -0,0 +1,7 @@
package eu.dnetlib.dhp.common.api.zenodo;
import java.util.ArrayList;
public class ZenodoModelList extends ArrayList<ZenodoModel> {
}

@ -1,15 +1,22 @@
package eu.dnetlib.dhp.utils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.util.Map;
import javax.xml.ws.BindingProvider;
import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class ISLookupClientFactory {
private static final Log log = LogFactory.getLog(ISLookupClientFactory.class);
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
private static int requestTimeout = 60000 * 10;
private static int connectTimeout = 60000 * 10;
public static ISLookUpService getLookUpService(final String isLookupUrl) {
return getServiceStub(ISLookUpService.class, isLookupUrl);
@ -21,6 +28,25 @@ public class ISLookupClientFactory {
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
jaxWsProxyFactory.setServiceClass(clazz);
jaxWsProxyFactory.setAddress(endpoint);
return (T) jaxWsProxyFactory.create();
final T service = (T) jaxWsProxyFactory.create();
if (service instanceof BindingProvider) {
log
.info(
"setting timeouts for {} to requestTimeout: {}, connectTimeout: {}",
BindingProvider.class.getName(), requestTimeout, connectTimeout);
Map<String, Object> requestContext = ((BindingProvider) service).getRequestContext();
requestContext.put("com.sun.xml.internal.ws.request.timeout", requestTimeout);
requestContext.put("com.sun.xml.internal.ws.connect.timeout", connectTimeout);
requestContext.put("com.sun.xml.ws.request.timeout", requestTimeout);
requestContext.put("com.sun.xml.ws.connect.timeout", connectTimeout);
requestContext.put("javax.xml.ws.client.receiveTimeout", requestTimeout);
requestContext.put("javax.xml.ws.client.connectionTimeout", connectTimeout);
}
return service;
}
}

@ -0,0 +1,85 @@
package eu.dnetlib.dhp.common.api;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@Disabled
public class ZenodoAPIClientTest {
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
private final String ACCESS_TOKEN = "";
private final String CONCEPT_REC_ID = "657113";
@Test
public void testNewDeposition() throws IOException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(201, client.newDeposition());
File file = new File(getClass()
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
.getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
Assertions.assertEquals(200, client.sendMretadata(metadata));
Assertions.assertEquals(202, client.publish());
}
@Test
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
File file = new File(getClass()
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
.getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
Assertions.assertEquals(202, client.publish());
}
@Test
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
File file = new File(getClass()
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
.getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
Assertions.assertEquals(202, client.publish());
}
}

@ -0,0 +1 @@
{"metadata":{"access_right":"open","communities":[{"identifier":"openaire-research-graph"}],"creators":[{"affiliation":"ISTI - CNR","name":"Bardi, Alessia","orcid":"0000-0002-1112-1292"},{"affiliation":"eifl", "name":"Kuchma, Iryna"},{"affiliation":"BIH", "name":"Brobov, Evgeny"},{"affiliation":"GIDIF RBM", "name":"Truccolo, Ivana"},{"affiliation":"unesp", "name":"Monteiro, Elizabete"},{"affiliation":"und", "name":"Casalegno, Carlotta"},{"affiliation":"CARL ABRC", "name":"Clary, Erin"},{"affiliation":"The University of Edimburgh", "name":"Romanowski, Andrew"},{"affiliation":"ISTI - CNR", "name":"Pavone, Gina"},{"affiliation":"ISTI - CNR", "name":"Artini, Michele"},{"affiliation":"ISTI - CNR","name":"Atzori, Claudio","orcid":"0000-0001-9613-6639"},{"affiliation":"University of Bielefeld","name":"Bäcker, Amelie","orcid":"0000-0001-6015-2063"},{"affiliation":"ISTI - CNR","name":"Baglioni, Miriam","orcid":"0000-0002-2273-9004"},{"affiliation":"University of Bielefeld","name":"Czerniak, Andreas","orcid":"0000-0003-3883-4169"},{"affiliation":"ISTI - CNR","name":"De Bonis, Michele"},{"affiliation":"Athena Research and Innovation Centre","name":"Dimitropoulos, Harry"},{"affiliation":"Athena Research and Innovation Centre","name":"Foufoulas, Ioannis"},{"affiliation":"University of Warsaw","name":"Horst, Marek"},{"affiliation":"Athena Research and Innovation Centre","name":"Iatropoulou, Katerina"},{"affiliation":"University of Warsaw","name":"Jacewicz, Przemyslaw"},{"affiliation":"Athena Research and Innovation Centre","name":"Kokogiannaki, Argiro", "orcid":"0000-0002-3880-0244"},{"affiliation":"ISTI - CNR","name":"La Bruzzo, Sandro","orcid":"0000-0003-2855-1245"},{"affiliation":"ISTI - CNR","name":"Lazzeri, Emma"},{"affiliation":"University of Bielefeld","name":"Löhden, Aenne"},{"affiliation":"ISTI - CNR","name":"Manghi, Paolo","orcid":"0000-0001-7291-3210"},{"affiliation":"ISTI - CNR","name":"Mannocci, Andrea","orcid":"0000-0002-5193-7851"},{"affiliation":"Athena Research and Innovation Center","name":"Manola, Natalia"},{"affiliation":"ISTI - CNR","name":"Ottonello, Enrico"},{"affiliation":"University of Bielefeld","name":"Shirrwagen, Jochen"}],"description":"\\u003cp\\u003eThis dump provides access to the metadata records of publications, research data, software and projects that may be relevant to the Corona Virus Disease (COVID-19) fight. The dump contains records of the OpenAIRE COVID-19 Gateway (https://covid-19.openaire.eu/), identified via full-text mining and inference techniques applied to the OpenAIRE Research Graph (https://explore.openaire.eu/). The Graph is one of the largest Open Access collections of metadata records and links between publications, datasets, software, projects, funders, and organizations, aggregating 12,000+ scientific data sources world-wide, among which the Covid-19 data sources Zenodo COVID-19 Community, WHO (World Health Organization), BIP! FInder for COVID-19, Protein Data Bank, Dimensions, scienceOpen, and RSNA. \\u003cp\\u003eThe dump consists of a gzip file containing one json per line. Each json is compliant to the schema available at https://doi.org/10.5281/zenodo.3974226\\u003c/p\\u003e ","title":"OpenAIRE Covid-19 publications, datasets, software and projects metadata.","upload_type":"dataset","version":"1.0"}}

@ -0,0 +1 @@
This is a test for a new version of an old deposition

@ -0,0 +1,2 @@
This is a test for a new version of an old deposition. This should replace the other new version. I expect to have only two
files in the deposition

@ -7,6 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class ModelConstants {
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
public static final String DNET_ACCESS_MODES = "dnet:access_modes";

@ -79,6 +79,15 @@ public class ModelSupport {
entityIdPrefix.put("result", "50");
}
public static final Map<String, String> idPrefixEntity = Maps.newHashMap();
static {
idPrefixEntity.put("10", "datasource");
idPrefixEntity.put("20", "organization");
idPrefixEntity.put("40", "project");
idPrefixEntity.put("50", "result");
}
public static final Map<String, RelationInverse> relationInverseMap = Maps.newHashMap();
static {

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount
*/
public class APC implements Serializable {
private String currency;
private String amount;
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public String getAmount() {
return amount;
}
public void setAmount(String amount) {
this.amount = amount;
}
}

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.schema.dump.oaf;
/**
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier
* element with a parameter scheme of type String to store the scheme. Values for this element are found against the
* COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
* the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
* COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
* access right scheme
*/
public class AccessRight extends Qualifier {
private String scheme;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public static AccessRight newInstance(String code, String label, String scheme) {
AccessRight ar = new AccessRight();
ar.setCode(code);
ar.setLabel(label);
ar.setScheme(scheme);
return ar;
}
}

@ -0,0 +1,73 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
/**
* Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
* name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
* type String to store the family name of the author. The value for this parameter corresponds to
* eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
* this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
* the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
* rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
* moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
* eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
* instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
* instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
* dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
*/
public class Author implements Serializable {
private String fullname;
private String name;
private String surname;
private Integer rank;
private Pid pid;
public String getFullname() {
return fullname;
}
public void setFullname(String fullname) {
this.fullname = fullname;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSurname() {
return surname;
}
public void setSurname(String surname) {
this.surname = surname;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
public Pid getPid() {
return pid;
}
public void setPid(Pid pid) {
this.pid = pid;
}
}

@ -0,0 +1,136 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.Objects;
/**
* To store information about the conference or journal where the result has been presented or published. It contains
* eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
* parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
* It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
* the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
* issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
* eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
* eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
* iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
* sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
* of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
* proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
* String to store the place of the conference. It corresponds to the parameter conferenceplace of
* eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
* to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
*/
public class Container implements Serializable {
private String name;
private String issnPrinted;
private String issnOnline;
private String issnLinking;
private String ep;
private String iss;
private String sp;
private String vol;
private String edition;
private String conferenceplace;
private String conferencedate;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getIssnPrinted() {
return issnPrinted;
}
public void setIssnPrinted(String issnPrinted) {
this.issnPrinted = issnPrinted;
}
public String getIssnOnline() {
return issnOnline;
}
public void setIssnOnline(String issnOnline) {
this.issnOnline = issnOnline;
}
public String getIssnLinking() {
return issnLinking;
}
public void setIssnLinking(String issnLinking) {
this.issnLinking = issnLinking;
}
public String getEp() {
return ep;
}
public void setEp(String ep) {
this.ep = ep;
}
public String getIss() {
return iss;
}
public void setIss(String iss) {
this.iss = iss;
}
public String getSp() {
return sp;
}
public void setSp(String sp) {
this.sp = sp;
}
public String getVol() {
return vol;
}
public void setVol(String vol) {
this.vol = vol;
}
public String getEdition() {
return edition;
}
public void setEdition(String edition) {
this.edition = edition;
}
public String getConferenceplace() {
return conferenceplace;
}
public void setConferenceplace(String conferenceplace) {
this.conferenceplace = conferenceplace;
}
public String getConferencedate() {
return conferencedate;
}
public void setConferencedate(String conferencedate) {
this.conferencedate = conferencedate;
}
}

@ -0,0 +1,38 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: -
* scheme of type String to store the scheme - value of type String to store the value in that scheme
*/
public class ControlledField implements Serializable {
private String scheme;
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static ControlledField newInstance(String scheme, String value) {
ControlledField cf = new ControlledField();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

@ -0,0 +1,37 @@
package eu.dnetlib.dhp.schema.dump.oaf;
/**
* Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
* result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
* to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
* eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
* dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
* datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
*/
public class Country extends Qualifier {
private Provenance provenance;
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public static Country newInstance(String code, String label, Provenance provenance) {
Country c = new Country();
c.setProvenance(provenance);
c.setCode(code);
c.setLabel(label);
return c;
}
public static Country newInstance(String code, String label, String provenance, String trust) {
return newInstance(code, label, Provenance.newInstance(provenance, trust));
}
}

@ -0,0 +1,36 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
public class Funder implements Serializable {
private String shortName;
private String name;
private String jurisdiction;
public String getJurisdiction() {
return jurisdiction;
}
public void setJurisdiction(String jurisdiction) {
this.jurisdiction = jurisdiction;
}
public String getShortName() {
return shortName;
}
public void setShortName(String shortName) {
this.shortName = shortName;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* Represents the geolocation information. It has three parameters: - point of type String to store the point
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
*/
public class GeoLocation implements Serializable {
private String point;
private String box;
private String place;
public String getPoint() {
return point;
}
public void setPoint(String point) {
this.point = point;
}
public String getBox() {
return box;
}
public void setBox(String box) {
this.box = box;
}
public String getPlace() {
return place;
}
public void setPlace(String place) {
this.place = place;
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
}
}

@ -0,0 +1,107 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
/**
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
* versions are two manifestations of the same research result. It has the following parameters: - license of type
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - hostedby of
* type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance can be
* viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - key corresponds
* to hostedby.key - value corresponds to hostedby.value - url of type List<String> list of locations where the instance
* is accessible. It corresponds to url of the instance to be dumped - collectedfrom of type
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
* collectedfrom.key - value corresponds to collectedfrom.value - publicationdate of type String to store the
* publication date of the instance ;// dateofacceptance; - refereed of type String to store information abour tthe
* review status of the instance. Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed'. It corresponds to
* refereed.classname of the instance to be dumped
*/
public class Instance implements Serializable {
private String license;
private AccessRight accessright;
private String type;
private KeyValue hostedby;
private List<String> url;
private KeyValue collectedfrom;
private String publicationdate;// dateofacceptance;
private String refereed; // peer-review status
public String getLicense() {
return license;
}
public void setLicense(String license) {
this.license = license;
}
public AccessRight getAccessright() {
return accessright;
}
public void setAccessright(AccessRight accessright) {
this.accessright = accessright;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public KeyValue getHostedby() {
return hostedby;
}
public void setHostedby(KeyValue hostedby) {
this.hostedby = hostedby;
}
public List<String> getUrl() {
return url;
}
public void setUrl(List<String> url) {
this.url = url;
}
public KeyValue getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(KeyValue collectedfrom) {
this.collectedfrom = collectedfrom;
}
public String getPublicationdate() {
return publicationdate;
}
public void setPublicationdate(String publicationdate) {
this.publicationdate = publicationdate;
}
public String getRefereed() {
return refereed;
}
public void setRefereed(String refereed) {
this.refereed = refereed;
}
}

@ -0,0 +1,48 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* To represent the information described by a key and a value. It has two parameters: - key to store the key (generally
* the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key)
*/
public class KeyValue implements Serializable {
private String key;
private String value;
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static KeyValue newInstance(String key, String value) {
KeyValue inst = new KeyValue();
inst.key = key;
inst.value = value;
return inst;
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
}
}

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* To represent the generic persistent identifier. It has two parameters: - id of type
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. -
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
*/
public class Pid implements Serializable {
private ControlledField id;
private Provenance provenance;
public ControlledField getId() {
return id;
}
public void setId(ControlledField pid) {
this.id = pid;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public static Pid newInstance(ControlledField pid, Provenance provenance) {
Pid p = new Pid();
p.id = pid;
p.provenance = provenance;
return p;
}
public static Pid newInstance(ControlledField pid) {
Pid p = new Pid();
p.id = pid;
return p;
}
}

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
public class Project implements Serializable {
protected String id;// OpenAIRE id
protected String code;
protected String acronym;
protected String title;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

@ -0,0 +1,41 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* Indicates the process that produced (or provided) the information, and the trust associated to the information. It
* has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
* store the trust associated to the information
*/
public class Provenance implements Serializable {
private String provenance;
private String trust;
public String getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
this.provenance = provenance;
}
public String getTrust() {
return trust;
}
public void setTrust(String trust) {
this.trust = trust;
}
public static Provenance newInstance(String provenance, String trust) {
Provenance p = new Provenance();
p.provenance = provenance;
p.trust = trust;
return p;
}
public String toString() {
return provenance + trust;
}
}

@ -0,0 +1,42 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* To represent the information described by a code and a value It has two parameters: - code to store the code
* (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the
* classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element
*/
public class Qualifier implements Serializable {
private String code; // the classid in the Qualifier
private String label; // the classname in the Qualifier
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public static Qualifier newInstance(String code, String value) {
Qualifier qualifier = new Qualifier();
qualifier.setCode(code);
qualifier.setLabel(value);
return qualifier;
}
}

@ -0,0 +1,391 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
/**
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
* Initiative/Infrastructures. It has the following parameters: - author of type
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
* represented in the internal model one author in the esternal model is produced. - type of type String to represent
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
* resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
* corresponds to language.classname - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
* list to which the result is associated. For each country in the result respresented in the internal model one country
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
* the result. For each subject in the result represented in the internal model one subject in the external model is
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
* description.value in the result represented in the internal model - publicationdate of type String to store the
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
* internal model - instance of type List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated
* to the result. It corresponds to the same parameter in the result represented in the internal model - container of
* type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It corresponds to the parameter
* journal of the result represented in the internal model - documentationUrl of type List<String> (only for results of
* type software) to store the URLs to the software documentation. It corresponds to the list of documentationUrl.value
* of the result represented in the internal model - codeRepositoryUrl of type String (only for results of type
* software) to store the URL to the repository with the source code. It corresponds to codeRepositoryUrl.value of the
* result represented in the internal model - programmingLanguage of type String (only for results of type software) to
* store the programming language. It corresponds to programmingLanguaga.classid of the result represented in the
* internal model - contactperson of type List<String> (only for results of type other) to store the contact person for
* this result. It corresponds to the list of contactperson.value of the result represented in the internal model -
* contactgroup of type List<String> (only for results of type other) to store the information for the contact group. It
* corresponds to the list of contactgroup.value of the result represented in the internal model - tool of type
* List<String> (only fro results of type other) to store information about tool useful for the interpretation and/or
* re-used of the research product. It corresponds to the list of tool.value in the result represented in the internal
* modelt - size of type String (only for results of type dataset) to store the size of the dataset. It corresponds to
* size.value in the result represented in the internal model - version of type String (only for results of type
* dataset) to store the version. It corresponds to version.value of the result represented in the internal model -
* geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type dataset) to store
* geolocation information. For each geolocation element in the result represented in the internal model a GeoLocation
* in the external model il produced - id of type String to store the OpenAIRE id of the result. It corresponds to the
* id of the result represented in the internal model - originalId of type List<String> to store the original ids of the
* result. It corresponds to the originalId of the result represented in the internal model - pid of type
* List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For each pid
* in the results represented in the internal model one pid in the external model is produced. The value correspondence
* is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - value corresponds
* to the pid.value of the result represented in the internal model - dateofcollection of type String to store
* information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
*/
public class Result implements Serializable {
private List<Author> author;
// resulttype allows subclassing results into publications | datasets | software
private String type; // resulttype
// common fields
private Qualifier language;
private List<Country> country;
private List<Subject> subjects;
private String maintitle;
private String subtitle;
private List<String> description;
private String publicationdate; // dateofacceptance;
private String publisher;
private String embargoenddate;
private List<String> source;
private List<String> format;
private List<String> contributor;
private List<String> coverage;
private AccessRight bestaccessright;
private List<Instance> instance;
private Container container;// Journal
private List<String> documentationUrl; // software
private String codeRepositoryUrl; // software
private String programmingLanguage; // software
private List<String> contactperson; // orp
private List<String> contactgroup; // orp
private List<String> tool; // orp
private String size; // dataset
private String version; // dataset
private List<GeoLocation> geolocation; // dataset
private String id;
private List<String> originalId;
private List<ControlledField> pid;
private String dateofcollection;
private Long lastupdatetimestamp;
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
this.lastupdatetimestamp = lastupdatetimestamp;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<String> getOriginalId() {
return originalId;
}
public void setOriginalId(List<String> originalId) {
this.originalId = originalId;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
public String getDateofcollection() {
return dateofcollection;
}
public void setDateofcollection(String dateofcollection) {
this.dateofcollection = dateofcollection;
}
public List<Author> getAuthor() {
return author;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public Container getContainer() {
return container;
}
public void setContainer(Container container) {
this.container = container;
}
public void setAuthor(List<Author> author) {
this.author = author;
}
public Qualifier getLanguage() {
return language;
}
public void setLanguage(Qualifier language) {
this.language = language;
}
public List<Country> getCountry() {
return country;
}
public void setCountry(List<Country> country) {
this.country = country;
}
public List<Subject> getSubjects() {
return subjects;
}
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
public String getMaintitle() {
return maintitle;
}
public void setMaintitle(String maintitle) {
this.maintitle = maintitle;
}
public String getSubtitle() {
return subtitle;
}
public void setSubtitle(String subtitle) {
this.subtitle = subtitle;
}
public List<String> getDescription() {
return description;
}
public void setDescription(List<String> description) {
this.description = description;
}
public String getPublicationdate() {
return publicationdate;
}
public void setPublicationdate(String publicationdate) {
this.publicationdate = publicationdate;
}
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
public String getEmbargoenddate() {
return embargoenddate;
}
public void setEmbargoenddate(String embargoenddate) {
this.embargoenddate = embargoenddate;
}
public List<String> getSource() {
return source;
}
public void setSource(List<String> source) {
this.source = source;
}
public List<String> getFormat() {
return format;
}
public void setFormat(List<String> format) {
this.format = format;
}
public List<String> getContributor() {
return contributor;
}
public void setContributor(List<String> contributor) {
this.contributor = contributor;
}
public List<String> getCoverage() {
return coverage;
}
public void setCoverage(List<String> coverage) {
this.coverage = coverage;
}
public AccessRight getBestaccessright() {
return bestaccessright;
}
public void setBestaccessright(AccessRight bestaccessright) {
this.bestaccessright = bestaccessright;
}
public List<Instance> getInstance() {
return instance;
}
public void setInstance(List<Instance> instance) {
this.instance = instance;
}
public List<String> getDocumentationUrl() {
return documentationUrl;
}
public void setDocumentationUrl(List<String> documentationUrl) {
this.documentationUrl = documentationUrl;
}
public String getCodeRepositoryUrl() {
return codeRepositoryUrl;
}
public void setCodeRepositoryUrl(String codeRepositoryUrl) {
this.codeRepositoryUrl = codeRepositoryUrl;
}
public String getProgrammingLanguage() {
return programmingLanguage;
}
public void setProgrammingLanguage(String programmingLanguage) {
this.programmingLanguage = programmingLanguage;
}
public List<String> getContactperson() {
return contactperson;
}
public void setContactperson(List<String> contactperson) {
this.contactperson = contactperson;
}
public List<String> getContactgroup() {
return contactgroup;
}
public void setContactgroup(List<String> contactgroup) {
this.contactgroup = contactgroup;
}
public List<String> getTool() {
return tool;
}
public void setTool(List<String> tool) {
this.tool = tool;
}
public String getSize() {
return size;
}
public void setSize(String size) {
this.size = size;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
public List<GeoLocation> getGeolocation() {
return geolocation;
}
public void setGeolocation(List<GeoLocation> geolocation) {
this.geolocation = geolocation;
}
}

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
/**
* To represent keywords associated to the result. It has two parameters: - subject of type
* eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to
* qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type
* eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
* is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to
* dataInfo.trust
*/
public class Subject implements Serializable {
private ControlledField subject;
private Provenance provenance;
public ControlledField getSubject() {
return subject;
}
public void setSubject(ControlledField subject) {
this.subject = subject;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
import eu.dnetlib.dhp.schema.dump.oaf.Result;
/**
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
* information is added after the result is mapped to the external model - context of type
* List<eu.dnetlib.dhp.schema/dump.oaf.community.Context> to store information about the RC RI related to the result.
* For each context in the result represented in the internal model one context in the external model is produced -
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
* the record has been collected. For each collectedfrom in the result represented in the internal model one
* collectedfrom in the external model is produced
*/
public class CommunityResult extends Result {
private List<Project> projects;
private List<Context> context;
protected List<KeyValue> collectedfrom;
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public List<Project> getProjects() {
return projects;
}
public void setProjects(List<Project> projects) {
this.projects = projects;
}
public List<Context> getContext() {
return context;
}
public void setContext(List<Context> context) {
this.context = context;
}
}

@ -0,0 +1,40 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.util.List;
import java.util.Objects;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
/**
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.Provenance> to store the provenances of the association between the result and
* the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
* to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
* will be used as value for code - label it corresponds to the label associated to the id. The information id taken
* from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
* result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
* instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
* dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
*/
public class Context extends Qualifier {
private List<Provenance> provenance;
public List<Provenance> getProvenance() {
return provenance;
}
public void setProvenance(List<Provenance> provenance) {
this.provenance = provenance;
}
@Override
public int hashCode() {
String provenance = new String();
this.provenance.forEach(p -> provenance.concat(p.toString()));
return Objects.hash(getCode(), getLabel(), provenance);
}
}

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.io.Serializable;
/**
* To store information about the funder funding the project related to the result. It has the following parameters: -
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
* store the jurisdiction of the funder
*/
public class Funder implements Serializable {
private String shortName;
private String name;
private String fundingStream;
private String jurisdiction;
public String getJurisdiction() {
return jurisdiction;
}
public void setJurisdiction(String jurisdiction) {
this.jurisdiction = jurisdiction;
}
public String getShortName() {
return shortName;
}
public void setShortName(String shortName) {
this.shortName = shortName;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getFundingStream() {
return fundingStream;
}
public void setFundingStream(String fundingStream) {
this.fundingStream = fundingStream;
}
}

@ -0,0 +1,88 @@
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.io.Serializable;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
/**
* To store information about the project related to the result. This information is not directly mapped from the result
* represented in the internal model because it is not there. The mapped result will be enriched with project
* information derived by relation between results and projects. Project class has the following parameters: - id of
* type String to store the OpenAIRE id for the Project - code of type String to store the grant agreement - acronym of
* type String to store the acronym for the project - title of type String to store the title of the project - funder of
* type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information about the funder funding the project -
* provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store information about the. provenance of the
* association between the result and the project
*/
public class Project implements Serializable {
private String id;// OpenAIRE id
private String code;
private String acronym;
private String title;
private Funder funder;
private Provenance provenance;
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public Funder getFunder() {
return funder;
}
public void setFunder(Funder funders) {
this.funder = funders;
}
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
Project project = new Project();
project.setAcronym(acronym);
project.setCode(code);
project.setFunder(funder);
project.setId(id);
project.setTitle(title);
return project;
}
}

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
public class Constants implements Serializable {
// collectedFrom va con isProvidedBy -> becco da ModelSupport
public static final String HOSTED_BY = "isHostedBy";
public static final String HOSTS = "hosts";
// community result uso isrelatedto
public static final String RESULT_ENTITY = "result";
public static final String DATASOURCE_ENTITY = "datasource";
public static final String CONTEXT_ENTITY = "context";
public static final String CONTEXT_ID = "60";
public static final String CONTEXT_NS_PREFIX = "context____";
}

@ -0,0 +1,316 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.Container;
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
/**
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
* id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
* represented in the internal model - originalId of type List<String> to store the list of original ids associated to
* the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
* null values are filtered out - pid of type List<eu.dnetlib.shp.schema.dump.oaf.ControlledField> to store the
* persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
* in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
* the internal model - value corresponds to pid.value of the datasource represented in the internal model -
* datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
* pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
* corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
* datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
* store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
* It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
* of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
* represented in the internal model - englishname of type String to store the English name of the datasource. It
* corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
* store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
* the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
* logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
* of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
* datasource represented in the internal model - description of type String to store the description for the
* datasource. It corresponds to description.value of the datasource represented in the internal model
*/
public class Datasource implements Serializable {
private String id; // string
private List<String> originalId; // list string
private List<ControlledField> pid; // list<String>
private ControlledField datasourcetype; // value
private String openairecompatibility; // value
private String officialname; // string
private String englishname; // string
private String websiteurl; // string
private String logourl; // string
private String dateofvalidation; // string
private String description; // description
private List<String> subjects; // List<String>
// opendoar specific fields (od*)
private List<String> languages; // odlanguages List<String>
private List<String> contenttypes; // odcontent types List<String>
// re3data fields
private String releasestartdate; // string
private String releaseenddate; // string
private String missionstatementurl; // string
// {open, restricted or closed}
private String accessrights; // databaseaccesstype string
// {open, restricted or closed}
private String uploadrights; // datauploadtype string
// {feeRequired, registration, other}
private String databaseaccessrestriction; // string
// {feeRequired, registration, other}
private String datauploadrestriction; // string
private Boolean versioning; // boolean
private String citationguidelineurl; // string
// {yes, no, uknown}
private String pidsystems; // string
private String certificates; // string
private List<Object> policies; //
private Container journal; // issn etc del Journal
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<String> getOriginalId() {
return originalId;
}
public void setOriginalId(List<String> originalId) {
this.originalId = originalId;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
public ControlledField getDatasourcetype() {
return datasourcetype;
}
public void setDatasourcetype(ControlledField datasourcetype) {
this.datasourcetype = datasourcetype;
}
public String getOpenairecompatibility() {
return openairecompatibility;
}
public void setOpenairecompatibility(String openairecompatibility) {
this.openairecompatibility = openairecompatibility;
}
public String getOfficialname() {
return officialname;
}
public void setOfficialname(String officialname) {
this.officialname = officialname;
}
public String getEnglishname() {
return englishname;
}
public void setEnglishname(String englishname) {
this.englishname = englishname;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public String getLogourl() {
return logourl;
}
public void setLogourl(String logourl) {
this.logourl = logourl;
}
public String getDateofvalidation() {
return dateofvalidation;
}
public void setDateofvalidation(String dateofvalidation) {
this.dateofvalidation = dateofvalidation;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public List<String> getSubjects() {
return subjects;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public List<String> getLanguages() {
return languages;
}
public void setLanguages(List<String> languages) {
this.languages = languages;
}
public List<String> getContenttypes() {
return contenttypes;
}
public void setContenttypes(List<String> contenttypes) {
this.contenttypes = contenttypes;
}
public String getReleasestartdate() {
return releasestartdate;
}
public void setReleasestartdate(String releasestartdate) {
this.releasestartdate = releasestartdate;
}
public String getReleaseenddate() {
return releaseenddate;
}
public void setReleaseenddate(String releaseenddate) {
this.releaseenddate = releaseenddate;
}
public String getMissionstatementurl() {
return missionstatementurl;
}
public void setMissionstatementurl(String missionstatementurl) {
this.missionstatementurl = missionstatementurl;
}
public String getAccessrights() {
return accessrights;
}
public void setAccessrights(String accessrights) {
this.accessrights = accessrights;
}
public String getUploadrights() {
return uploadrights;
}
public void setUploadrights(String uploadrights) {
this.uploadrights = uploadrights;
}
public String getDatabaseaccessrestriction() {
return databaseaccessrestriction;
}
public void setDatabaseaccessrestriction(String databaseaccessrestriction) {
this.databaseaccessrestriction = databaseaccessrestriction;
}
public String getDatauploadrestriction() {
return datauploadrestriction;
}
public void setDatauploadrestriction(String datauploadrestriction) {
this.datauploadrestriction = datauploadrestriction;
}
public Boolean getVersioning() {
return versioning;
}
public void setVersioning(Boolean versioning) {
this.versioning = versioning;
}
public String getCitationguidelineurl() {
return citationguidelineurl;
}
public void setCitationguidelineurl(String citationguidelineurl) {
this.citationguidelineurl = citationguidelineurl;
}
public String getPidsystems() {
return pidsystems;
}
public void setPidsystems(String pidsystems) {
this.pidsystems = pidsystems;
}
public String getCertificates() {
return certificates;
}
public void setCertificates(String certificates) {
this.certificates = certificates;
}
public List<Object> getPolicies() {
return policies;
}
public void setPolicies(List<Object> policiesr3) {
this.policies = policiesr3;
}
public Container getJournal() {
return journal;
}
public void setJournal(Container journal) {
this.journal = journal;
}
}

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To store information about the funder funding the project related to the result. It has the following parameters:
* - private String shortName to store the short name of the funder (e.g. AKA)
* - private String name to store information about the name of the funder (e.g. Akademy of Finland)
* - private Fundings funding_stream to store the fundingstream
* - private String jurisdiction to store information about the jurisdiction of the funder
*/
public class Funder implements Serializable {
private String shortName;
private String name;
private Fundings funding_stream;
private String jurisdiction;
public String getShortName() {
return shortName;
}
public void setShortName(String shortName) {
this.shortName = shortName;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getJurisdiction() {
return jurisdiction;
}
public void setJurisdiction(String jurisdiction) {
this.jurisdiction = jurisdiction;
}
public Fundings getFunding_stream() {
return funding_stream;
}
public void setFunding_stream(Fundings funding_stream) {
this.funding_stream = funding_stream;
}
}

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To store inforamtion about the funding stream. It has two parameters:
* - private String id to store the id of the fundings stream. The id is created by appending the shortname of the
* funder to the name of each level in the xml representing the fundng stream. For example: if the funder is the
* European Commission, the funding level 0 name is FP7, the funding level 1 name is SP3 and the funding level 2 name is
* PEOPLE then the id will be: EC::FP7::SP3::PEOPLE
* - private String description to describe the funding stream. It is created by concatenating the description of each funding
* level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions
*/
public class Fundings implements Serializable {
private String id;
private String description;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

@ -0,0 +1,56 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.Optional;
/**
* To describe the funded amount. It has the following parameters:
* - private String currency to store the currency of the fund
* - private float totalcost to store the total cost of the project
* - private float fundedamount to store the funded amount by the funder
*/
public class Granted implements Serializable {
private String currency;
private float totalcost;
private float fundedamount;
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public float getTotalcost() {
return totalcost;
}
public void setTotalcost(float totalcost) {
this.totalcost = totalcost;
}
public float getFundedamount() {
return fundedamount;
}
public void setFundedamount(float fundedamount) {
this.fundedamount = fundedamount;
}
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
Granted granted = new Granted();
granted.currency = currency;
granted.totalcost = totalcost;
granted.fundedamount = fundedamount;
return granted;
}
public static Granted newInstance(String currency, float fundedamount) {
Granted granted = new Granted();
granted.currency = currency;
granted.fundedamount = fundedamount;
return granted;
}
}

@ -0,0 +1,41 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To represent the generic node in a relation. It has the following parameters:
* - private String id the openaire id of the entity in the relation
* - private String type the type of the entity in the relation.
*
* Consider the generic relation between a Result R and a Project P, the node representing R will have
* as id the id of R and as type result, while the node representing the project will have as id the id of the project
* and as type project
*/
public class Node implements Serializable {
private String id;
private String type;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public static Node newInstance(String id, String type) {
Node node = new Node();
node.id = id;
node.type = type;
return node;
}
}

@ -0,0 +1,88 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
import eu.dnetlib.dhp.schema.dump.oaf.Country;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
/**
* To represent the generic organizaiton. It has the following parameters:
* - private String legalshortname to store the legalshortname of the organizaiton
* - private String legalname to store the legal name of the organization
* - private String websiteurl to store the websiteurl of the organization
* - private List<String> alternativenames to store the alternative names of the organization
* - private Qualifier country to store the country of the organization
* - private String id to store the id of the organization
* - private List<ControlledField> pid to store the list of pids for the organization
*/
public class Organization implements Serializable {
private String legalshortname;
private String legalname;
private String websiteurl;
private List<String> alternativenames;
private Qualifier country;
private String id;
private List<ControlledField> pid;
public String getLegalshortname() {
return legalshortname;
}
public void setLegalshortname(String legalshortname) {
this.legalshortname = legalshortname;
}
public String getLegalname() {
return legalname;
}
public void setLegalname(String legalname) {
this.legalname = legalname;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public List<String> getAlternativenames() {
return alternativenames;
}
public void setAlternativenames(List<String> alternativenames) {
this.alternativenames = alternativenames;
}
public Qualifier getCountry() {
return country;
}
public void setCountry(Qualifier country) {
this.country = country;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
}

@ -0,0 +1,37 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To store information about the ec programme for the project. It has the following parameters:
* - private String code to store the code of the programme
* - private String description to store the description of the programme
*/
public class Programme implements Serializable {
private String code;
private String description;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public static Programme newInstance(String code, String description) {
Programme p = new Programme();
p.code = code;
p.description = description;
return p;
}
}

@ -0,0 +1,195 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
/**
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
* Projects but we put the information about the Funder within the Project representation. We also removed the
* collected from element from the Project. No relation between the Project and the Datasource entity from which it is
* collected will be created. We will never create relations between Project and Datasource. In case some relation will
* be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project,
* project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to
* 0. It has the following parameters:
* - private String id to store the id of the project (OpenAIRE id)
* - private String websiteurl to store the websiteurl of the project
* - private String code to store the grant agreement of the project
* - private String acronym to store the acronym of the project
* - private String title to store the tile of the project
* - private String startdate to store the start date
* - private String enddate to store the end date
* - private String callidentifier to store the call indentifier
* - private String keywords to store the keywords
* - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate
* for publications. This value will be set to true if one of the field in the project represented in the internal model
* is set to true
* - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for
* dataset. It is set to the value in the corresponding filed of the project represented in the internal model
* - private List<String> subject to store the list of subjects of the project
* - private List<Funder> funding to store the list of funder of the project
* - private String summary to store the summary of the project
* - private Granted granted to store the granted amount
* - private List<Programme> programme to store the list of programmes the project is related to
*/
public class Project implements Serializable {
private String id;
private String websiteurl;
private String code;
private String acronym;
private String title;
private String startdate;
private String enddate;
private String callidentifier;
private String keywords;
private boolean openaccessmandateforpublications;
private boolean openaccessmandatefordataset;
private List<String> subject;
private List<Funder> funding;
private String summary;
private Granted granted;
private List<Programme> programme;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getStartdate() {
return startdate;
}
public void setStartdate(String startdate) {
this.startdate = startdate;
}
public String getEnddate() {
return enddate;
}
public void setEnddate(String enddate) {
this.enddate = enddate;
}
public String getCallidentifier() {
return callidentifier;
}
public void setCallidentifier(String callidentifier) {
this.callidentifier = callidentifier;
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = keywords;
}
public boolean isOpenaccessmandateforpublications() {
return openaccessmandateforpublications;
}
public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) {
this.openaccessmandateforpublications = openaccessmandateforpublications;
}
public boolean isOpenaccessmandatefordataset() {
return openaccessmandatefordataset;
}
public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) {
this.openaccessmandatefordataset = openaccessmandatefordataset;
}
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
public List<Funder> getFunding() {
return funding;
}
public void setFunding(List<Funder> funding) {
this.funding = funding;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public Granted getGranted() {
return granted;
}
public void setGranted(Granted granted) {
this.granted = granted;
}
public List<Programme> getProgramme() {
return programme;
}
public void setProgramme(List<Programme> programme) {
this.programme = programme;
}
}

@ -0,0 +1,40 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To represent the semantics of the generic relation between two entities. It has the following parameters:
* - private String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the
* relclass parameter in the relation represented in the internal model
* represented in the internal model
* - private String type to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter
* of the relation represented in theinternal model
*/
public class RelType implements Serializable {
private String name; // relclass
private String type; // subreltype
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public static RelType newInstance(String name, String type) {
RelType rel = new RelType();
rel.name = name;
rel.type = type;
return rel;
}
}

@ -0,0 +1,68 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.Objects;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
/**
* To represent the gereric relation between two entities. It has the following parameters:
* - private Node source to represent the entity source of the relation
* - private Node target to represent the entity target of the relation
* - private RelType reltype to represent the semantics of the relation
* - private Provenance provenance to represent the provenance of the relation
*/
public class Relation implements Serializable {
private Node source;
private Node target;
private RelType reltype;
private Provenance provenance;
public Node getSource() {
return source;
}
public void setSource(Node source) {
this.source = source;
}
public Node getTarget() {
return target;
}
public void setTarget(Node target) {
this.target = target;
}
public RelType getReltype() {
return reltype;
}
public void setReltype(RelType reltype) {
this.reltype = reltype;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
@Override
public int hashCode() {
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
}
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
Relation relation = new Relation();
relation.source = source;
relation.target = target;
relation.reltype = reltype;
relation.provenance = provenance;
return relation;
}
}

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.util.List;
/**
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
* to store the list of subjects related to the community
*/
public class ResearchCommunity extends ResearchInitiative {
private List<String> subject;
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
}

@ -0,0 +1,71 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
/**
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
* - private String id to store the openaire id for the entity. Is has as code 00 and will be created as
* 00|context_____::md5(originalId)
* private String originalId to store the id of the context as provided in the profile (i.e. mes)
* private String name to store the name of the context (got from the label attribute in the context definition)
* private String type to store the type of the context (i.e.: research initiative or research community)
* private String description to store the description of the context as given in the profile
* private String zenodo_community to store the zenodo community associated to the context (main zenodo community)
*/
public class ResearchInitiative implements Serializable {
private String id; // openaireId
private String originalId; // context id
private String name; // context name
private String type; // context type: research initiative or research community
private String description;
private String zenodo_community;
public String getZenodo_community() {
return zenodo_community;
}
public void setZenodo_community(String zenodo_community) {
this.zenodo_community = zenodo_community;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String label) {
this.name = label;
}
public String getOriginalId() {
return originalId;
}
public void setOriginalId(String originalId) {
this.originalId = originalId;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

@ -1,30 +0,0 @@
package eu.dnetlib.dhp.schema.scholexplorer;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class DLIRelation extends Relation {
private String dateOfCollection;
private List<KeyValue> collectedFrom;
public List<KeyValue> getCollectedFrom() {
return collectedFrom;
}
public void setCollectedFrom(List<KeyValue> collectedFrom) {
this.collectedFrom = collectedFrom;
}
public String getDateOfCollection() {
return dateOfCollection;
}
public void setDateOfCollection(String dateOfCollection) {
this.dateOfCollection = dateOfCollection;
}
}

@ -2,10 +2,8 @@
package eu.dnetlib.dhp.schema.scholexplorer;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
@ -78,6 +76,25 @@ public class DLIUnknown extends Oaf implements Serializable {
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
if (StringUtils.isEmpty(id) && StringUtils.isNoneEmpty(p.getId()))
id = p.getId();
if (StringUtils.isEmpty(dateofcollection) && StringUtils.isNoneEmpty(p.getDateofcollection()))
dateofcollection = p.getDateofcollection();
if (StringUtils.isEmpty(dateoftransformation) && StringUtils.isNoneEmpty(p.getDateoftransformation()))
dateofcollection = p.getDateoftransformation();
pid = mergeLists(pid, p.getPid());
}
protected <T> List<T> mergeLists(final List<T>... lists) {
return Arrays
.stream(lists)
.filter(Objects::nonNull)
.flatMap(List::stream)
.filter(Objects::nonNull)
.distinct()
.collect(Collectors.toList());
}
private List<ProvenaceInfo> mergeProvenance(

@ -31,6 +31,10 @@
<artifactId>elasticsearch-hadoop</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.broker.model;
import java.io.Serializable;
public class ConditionParams implements Serializable {
/**
*
*/
private static final long serialVersionUID = 2719901844537516110L;
private String value;
private String otherValue;
public String getValue() {
return value;
}
public void setValue(final String value) {
this.value = value;
}
public String getOtherValue() {
return otherValue;
}
public void setOtherValue(final String otherValue) {
this.otherValue = otherValue;
}
}

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.broker.model;
import java.text.ParseException;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
@ -19,16 +18,12 @@ public class EventFactory {
private final static String PRODUCER_ID = "OpenAIRE";
private static final int TTH_DAYS = 365;
private final static String[] DATE_PATTERNS = {
"yyyy-MM-dd"
};
public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {
final long now = new Date().getTime();
final Event res = new Event();
final MappedFields map = createMapFromResult(updateInfo);
@ -44,8 +39,8 @@ public class EventFactory {
res.setPayload(updateInfo.asBrokerPayload().toJSON());
res.setMap(map);
res.setTopic(updateInfo.getTopicPath());
res.setCreationDate(now);
res.setExpiryDate(calculateExpiryDate(now));
res.setCreationDate(0l);
res.setExpiryDate(Long.MAX_VALUE);
res.setInstantMessage(false);
return res;
@ -96,7 +91,9 @@ public class EventFactory {
return map;
}
private static String calculateEventId(final String topic, final String dsId, final String publicationId,
private static String calculateEventId(final String topic,
final String dsId,
final String publicationId,
final String value) {
return "event-"
+ DigestUtils.md5Hex(topic).substring(0, 4) + "-"
@ -105,10 +102,6 @@ public class EventFactory {
+ DigestUtils.md5Hex(value).substring(0, 5);
}
private static long calculateExpiryDate(final long now) {
return now + TTH_DAYS * 24 * 60 * 60 * 1000;
}
private static long parseDateTolong(final String date) {
if (StringUtils.isBlank(date)) {
return -1;

@ -0,0 +1,37 @@
package eu.dnetlib.dhp.broker.model;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@JsonIgnoreProperties(ignoreUnknown = true)
public class MapCondition implements Serializable {
/**
*
*/
private static final long serialVersionUID = -7137490975452466813L;
private String field;
private List<ConditionParams> listParams = new ArrayList<>();
public String getField() {
return field;
}
public void setField(final String field) {
this.field = field;
}
public List<ConditionParams> getListParams() {
return listParams;
}
public void setListParams(final List<ConditionParams> listParams) {
this.listParams = listParams;
}
}

@ -0,0 +1,93 @@
package eu.dnetlib.dhp.broker.model;
import java.io.Serializable;
public class Notification implements Serializable {
/**
*
*/
private static final long serialVersionUID = -1770420972526995727L;
private String notificationId;
private String subscriptionId;
private String producerId;
private String eventId;
private String topic;
private Long date;
private String payload;
private MappedFields map;
public String getNotificationId() {
return notificationId;
}
public void setNotificationId(final String notificationId) {
this.notificationId = notificationId;
}
public String getSubscriptionId() {
return subscriptionId;
}
public void setSubscriptionId(final String subscriptionId) {
this.subscriptionId = subscriptionId;
}
public String getProducerId() {
return producerId;
}
public void setProducerId(final String producerId) {
this.producerId = producerId;
}
public String getEventId() {
return eventId;
}
public void setEventId(final String eventId) {
this.eventId = eventId;
}
public String getTopic() {
return topic;
}
public void setTopic(final String topic) {
this.topic = topic;
}
public String getPayload() {
return payload;
}
public void setPayload(final String payload) {
this.payload = payload;
}
public MappedFields getMap() {
return map;
}
public void setMap(final MappedFields map) {
this.map = map;
}
public Long getDate() {
return date;
}
public void setDate(final Long date) {
this.date = date;
}
}

@ -0,0 +1,74 @@
package eu.dnetlib.dhp.broker.model;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.ObjectMapper;
@JsonIgnoreProperties(ignoreUnknown = true)
public class Subscription implements Serializable {
/**
*
*/
private static final long serialVersionUID = 1051702214740830010L;
private String subscriptionId;
private String subscriber;
private String topic;
private String conditions;
public String getSubscriptionId() {
return subscriptionId;
}
public void setSubscriptionId(final String subscriptionId) {
this.subscriptionId = subscriptionId;
}
public String getSubscriber() {
return subscriber;
}
public void setSubscriber(final String subscriber) {
this.subscriber = subscriber;
}
public String getTopic() {
return topic;
}
public void setTopic(final String topic) {
this.topic = topic;
}
public String getConditions() {
return conditions;
}
public void setConditions(final String conditions) {
this.conditions = conditions;
}
public Map<String, List<ConditionParams>> conditionsAsMap() {
final ObjectMapper mapper = new ObjectMapper();
try {
final List<MapCondition> list = mapper
.readValue(
getConditions(), mapper.getTypeFactory().constructCollectionType(List.class, MapCondition.class));
return list
.stream()
.filter(mc -> !mc.getListParams().isEmpty())
.collect(Collectors.toMap(MapCondition::getField, MapCondition::getListParams));
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
}

@ -28,7 +28,7 @@ public class GenerateStatsJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
IndexOnESJob.class
GenerateStatsJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
parser.parseArgument(args);

@ -0,0 +1,126 @@
package eu.dnetlib.dhp.broker.oa;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.TypedColumn;
import org.apache.spark.util.LongAccumulator;
import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
import eu.dnetlib.dhp.broker.oa.util.aggregators.subset.EventSubsetAggregator;
public class IndexEventSubsetJob {
private static final Logger log = LoggerFactory.getLogger(IndexEventSubsetJob.class);
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
IndexEventSubsetJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_event_subset.json")));
parser.parseArgument(args);
final SparkConf conf = new SparkConf();
final String eventsPath = parser.get("workingPath") + "/events";
log.info("eventsPath: {}", eventsPath);
final String index = parser.get("index");
log.info("index: {}", index);
final String indexHost = parser.get("esHost");
log.info("indexHost: {}", indexHost);
final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
log.info("maxEventsForTopic: {}", maxEventsForTopic);
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
final TypedColumn<Event, EventGroup> aggr = new EventSubsetAggregator(maxEventsForTopic).toColumn();
final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed");
final long now = new Date().getTime();
final Dataset<Event> subset = ClusterUtils
.readPath(spark, eventsPath, Event.class)
.groupByKey(e -> e.getTopic() + '@' + e.getMap().getTargetDatasourceId(), Encoders.STRING())
.agg(aggr)
.map(t -> t._2, Encoders.bean(EventGroup.class))
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
final JavaRDD<String> inputRdd = subset
.map(e -> prepareEventForIndexing(e, now, total), Encoders.STRING())
.javaRDD();
final Map<String, String> esCfg = new HashMap<>();
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
esCfg.put("es.index.auto.create", "false");
esCfg.put("es.nodes", indexHost);
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
esCfg.put("es.batch.write.retry.count", "8");
esCfg.put("es.batch.write.retry.wait", "60s");
esCfg.put("es.batch.size.entries", "200");
esCfg.put("es.nodes.wan.only", "true");
log.info("*** Start indexing");
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
log.info("*** End indexing");
log.info("*** Deleting old events");
final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000);
log.info("*** Deleted events: " + message);
}
private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception {
final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l;
final HttpDelete req = new HttpDelete(url);
try (final CloseableHttpClient client = HttpClients.createDefault()) {
try (final CloseableHttpResponse response = client.execute(req)) {
return IOUtils.toString(response.getEntity().getContent());
}
}
}
private static String prepareEventForIndexing(final Event e, final long creationDate, final LongAccumulator acc)
throws JsonProcessingException {
acc.add(1);
e.setCreationDate(creationDate);
e.setExpiryDate(Long.MAX_VALUE);
return new ObjectMapper().writeValueAsString(e);
}
}

@ -0,0 +1,233 @@
package eu.dnetlib.dhp.broker.oa;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator;
import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.broker.model.ConditionParams;
import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.model.MappedFields;
import eu.dnetlib.dhp.broker.model.Notification;
import eu.dnetlib.dhp.broker.model.Subscription;
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
import eu.dnetlib.dhp.broker.oa.util.NotificationGroup;
import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils;
public class IndexNotificationsJob {
private static final Logger log = LoggerFactory.getLogger(IndexNotificationsJob.class);
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(IndexNotificationsJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json")));
parser.parseArgument(args);
final SparkConf conf = new SparkConf();
final String eventsPath = parser.get("workingPath") + "/events";
log.info("eventsPath: {}", eventsPath);
final String index = parser.get("index");
log.info("index: {}", index);
final String indexHost = parser.get("esHost");
log.info("indexHost: {}", indexHost);
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed");
final long startTime = new Date().getTime();
final List<Subscription> subscriptions = listSubscriptions(brokerApiBaseUrl);
log.info("Number of subscriptions: " + subscriptions.size());
if (subscriptions.size() > 0) {
final Dataset<Notification> notifications = ClusterUtils
.readPath(spark, eventsPath, Event.class)
.map(e -> generateNotifications(e, subscriptions, startTime), Encoders.bean(NotificationGroup.class))
.flatMap(g -> g.getData().iterator(), Encoders.bean(Notification.class));
final JavaRDD<String> inputRdd = notifications
.map(n -> prepareForIndexing(n, total), Encoders.STRING())
.javaRDD();
final Map<String, String> esCfg = new HashMap<>();
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
esCfg.put("es.index.auto.create", "false");
esCfg.put("es.nodes", indexHost);
esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
esCfg.put("es.batch.write.retry.count", "8");
esCfg.put("es.batch.write.retry.wait", "60s");
esCfg.put("es.batch.size.entries", "200");
esCfg.put("es.nodes.wan.only", "true");
log.info("*** Start indexing");
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
log.info("*** End indexing");
log.info("*** Deleting old notifications");
final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000);
log.info("*** Deleted notifications: " + message);
log.info("*** sendNotifications (emails, ...)");
sendNotifications(brokerApiBaseUrl, startTime - 1000);
log.info("*** ALL done.");
}
}
private static NotificationGroup generateNotifications(final Event e,
final List<Subscription> subscriptions,
final long date) {
final List<Notification> list = subscriptions
.stream()
.filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
.filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap()))
.map(s -> generateNotification(s, e, date))
.collect(Collectors.toList());
return new NotificationGroup(list);
}
private static Notification generateNotification(final Subscription s, final Event e, final long date) {
final Notification n = new Notification();
n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId()));
n.setSubscriptionId(s.getSubscriptionId());
n.setEventId(e.getEventId());
n.setProducerId(e.getProducerId());
n.setTopic(e.getTopic());
n.setPayload(e.getPayload());
n.setMap(e.getMap());
n.setDate(date);
return n;
}
private static boolean verifyConditions(final MappedFields map,
final Map<String, List<ConditionParams>> conditions) {
if (conditions.containsKey("targetDatasourceName")
&& !SubscriptionUtils
.verifyExact(map.getTargetDatasourceName(), conditions.get("targetDatasourceName").get(0).getValue())) {
return false;
}
if (conditions.containsKey("trust")
&& !SubscriptionUtils
.verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) {
return false;
}
if (conditions.containsKey("targetDateofacceptance") && !conditions
.get("targetDateofacceptance")
.stream()
.anyMatch(c -> SubscriptionUtils
.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
return false;
}
if (conditions.containsKey("targetResultTitle")
&& !conditions
.get("targetResultTitle")
.stream()
.anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) {
return false;
}
if (conditions.containsKey("targetAuthors")
&& !conditions
.get("targetAuthors")
.stream()
.allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) {
return false;
}
if (conditions.containsKey("targetSubjects")
&& !conditions
.get("targetSubjects")
.stream()
.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()))) {
return false;
}
return true;
}
private static List<Subscription> listSubscriptions(final String brokerApiBaseUrl) throws Exception {
final String url = brokerApiBaseUrl + "/api/subscriptions";
final HttpGet req = new HttpGet(url);
final ObjectMapper mapper = new ObjectMapper();
try (final CloseableHttpClient client = HttpClients.createDefault()) {
try (final CloseableHttpResponse response = client.execute(req)) {
final String s = IOUtils.toString(response.getEntity().getContent());
return mapper
.readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, Subscription.class));
}
}
}
private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception {
final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l;
final HttpDelete req = new HttpDelete(url);
try (final CloseableHttpClient client = HttpClients.createDefault()) {
try (final CloseableHttpResponse response = client.execute(req)) {
return IOUtils.toString(response.getEntity().getContent());
}
}
}
private static String sendNotifications(final String brokerApiBaseUrl, final long l) throws IOException {
final String url = brokerApiBaseUrl + "/api/openaireBroker/notifications/send/" + l;
final HttpGet req = new HttpGet(url);
try (final CloseableHttpClient client = HttpClients.createDefault()) {
try (final CloseableHttpResponse response = client.execute(req)) {
return IOUtils.toString(response.getEntity().getContent());
}
}
}
private static String prepareForIndexing(final Notification n, final LongAccumulator acc)
throws JsonProcessingException {
acc.add(1);
return new ObjectMapper().writeValueAsString(n);
}
}

@ -20,6 +20,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
@Deprecated
public class IndexOnESJob {
private static final Logger log = LoggerFactory.getLogger(IndexOnESJob.class);
@ -48,12 +49,13 @@ public class IndexOnESJob {
final JavaRDD<String> inputRdd = ClusterUtils
.readPath(spark, eventsPath, Event.class)
// .limit(10000) // TODO REMOVE
.map(IndexOnESJob::eventAsJsonString, Encoders.STRING())
.javaRDD();
final Map<String, String> esCfg = new HashMap<>();
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
esCfg.put("es.index.auto.create", "false");
esCfg.put("es.nodes", indexHost);
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
esCfg.put("es.batch.write.retry.count", "8");

@ -0,0 +1,44 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import eu.dnetlib.dhp.broker.model.Notification;
public class NotificationGroup implements Serializable {
/**
*
*/
private static final long serialVersionUID = 720996471281158977L;
private List<Notification> data = new ArrayList<>();
public NotificationGroup() {
}
public NotificationGroup(final List<Notification> data) {
this.data = data;
}
public List<Notification> getData() {
return data;
}
public void setData(final List<Notification> data) {
this.data = data;
}
public NotificationGroup addElement(final Notification elem) {
data.add(elem);
return this;
}
public NotificationGroup addGroup(final NotificationGroup group) {
data.addAll(group.getData());
return this;
}
}

@ -0,0 +1,49 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.text.ParseException;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.commons.lang3.time.DateUtils;
public class SubscriptionUtils {
private static final long ONE_DAY = 86_400_000;
public static boolean verifyListSimilar(final List<String> list, final String value) {
return list.stream().anyMatch(s -> verifySimilar(s, value));
}
public static boolean verifyListExact(final List<String> list, final String value) {
return list.stream().anyMatch(s -> verifyExact(s, value));
}
public static boolean verifySimilar(final String s1, final String s2) {
for (final String part : s2.split("\\W+")) {
if (!StringUtils.containsIgnoreCase(s1, part)) {
return false;
}
}
return true;
}
public static boolean verifyFloatRange(final float trust, final String min, final String max) {
return trust >= NumberUtils.toFloat(min, 0) && trust <= NumberUtils.toFloat(max, 1);
}
public static boolean verifyDateRange(final long date, final String min, final String max) {
try {
return date >= DateUtils.parseDate(min, "yyyy-MM-dd").getTime()
&& date < DateUtils.parseDate(max, "yyyy-MM-dd").getTime() + ONE_DAY;
} catch (final ParseException e) {
return false;
}
}
public static boolean verifyExact(final String s1, final String s2) {
return StringUtils.equalsIgnoreCase(s1, s2);
}
}

@ -0,0 +1,67 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.subset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.expressions.Aggregator;
import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
public class EventSubsetAggregator extends Aggregator<Event, EventGroup, EventGroup> {
/**
*
*/
private static final long serialVersionUID = -678071078823059805L;
private final int maxEventsForTopic;
public EventSubsetAggregator(final int maxEventsForTopic) {
this.maxEventsForTopic = maxEventsForTopic;
}
@Override
public EventGroup zero() {
return new EventGroup();
}
@Override
public EventGroup reduce(final EventGroup g, final Event e) {
if (g.getData().size() < maxEventsForTopic) {
g.getData().add(e);
}
return g;
}
@Override
public EventGroup merge(final EventGroup g0, final EventGroup g1) {
final int missing = maxEventsForTopic - g0.getData().size();
if (missing > 0) {
if (g1.getData().size() < missing) {
g0.getData().addAll(g1.getData());
} else {
g0.getData().addAll(g1.getData().subList(0, missing));
}
}
return g0;
}
@Override
public EventGroup finish(final EventGroup g) {
return g;
}
@Override
public Encoder<EventGroup> outputEncoder() {
return Encoders.bean(EventGroup.class);
}
@Override
public Encoder<EventGroup> bufferEncoder() {
return Encoders.bean(EventGroup.class);
}
}

@ -25,13 +25,25 @@
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
</property>
<property>
<name>esIndexName</name>
<description>the elasticsearch index name</description>
<name>esEventIndexName</name>
<description>the elasticsearch index name for events</description>
</property>
<property>
<name>esNotificationsIndexName</name>
<description>the elasticsearch index name for notifications</description>
</property>
<property>
<name>esIndexHost</name>
<description>the elasticsearch host</description>
</property>
<property>
<name>maxIndexedEventsForDsAndTopic</name>
<description>the max number of events for each couple (ds/topic)</description>
</property>
<property>
<name>brokerApiBaseUrl</name>
<description>the url of the broker service api</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
@ -423,16 +435,43 @@
<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
</spark>
<ok to="index_es"/>
<ok to="index_event_subset"/>
<error to="Kill"/>
</action>
<action name="index_event_subset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>IndexEventSubsetOnESJob</name>
<class>eu.dnetlib.dhp.broker.oa.IndexEventSubsetJob</class>
<jar>dhp-broker-events-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.dynamicAllocation.maxExecutors="8"
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--index</arg><arg>${esEventIndexName}</arg>
<arg>--esHost</arg><arg>${esIndexHost}</arg>
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
</spark>
<ok to="index_notifications"/>
<error to="Kill"/>
</action>
<action name="index_es">
<action name="index_notifications">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>IndexOnESJob</name>
<class>eu.dnetlib.dhp.broker.oa.IndexOnESJob</class>
<name>IndexNotificationsOnESJob</name>
<class>eu.dnetlib.dhp.broker.oa.IndexNotificationsJob</class>
<jar>dhp-broker-events-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -445,8 +484,9 @@
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--index</arg><arg>${esIndexName}</arg>
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
<arg>--esHost</arg><arg>${esIndexHost}</arg>
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
</spark>
<ok to="stats"/>
<error to="Kill"/>

@ -0,0 +1,32 @@
[
{
"paramName": "o",
"paramLongName": "workingPath",
"paramDescription": "the workinh path",
"paramRequired": true
},
{
"paramName": "idx",
"paramLongName": "index",
"paramDescription": "the ES index",
"paramRequired": true
},
{
"paramName": "es",
"paramLongName": "esHost",
"paramDescription": "the ES host",
"paramRequired": true
},
{
"paramName": "n",
"paramLongName": "maxEventsForTopic",
"paramDescription": "the max number of events for each couple (ds/topic)",
"paramRequired": true
},
{
"paramName": "broker",
"paramLongName": "brokerApiBaseUrl",
"paramDescription": "the url of the broker service api",
"paramRequired": true
}
]

@ -0,0 +1,26 @@
[
{
"paramName": "o",
"paramLongName": "workingPath",
"paramDescription": "the workinh path",
"paramRequired": true
},
{
"paramName": "idx",
"paramLongName": "index",
"paramDescription": "the ES index",
"paramRequired": true
},
{
"paramName": "es",
"paramLongName": "esHost",
"paramDescription": "the ES host",
"paramRequired": true
},
{
"paramName": "broker",
"paramLongName": "brokerApiBaseUrl",
"paramDescription": "the url of the broker service api",
"paramRequired": true
}
]

@ -8,6 +8,41 @@
<property>
<name>workingPath</name>
<description>the path where the the generated data will be stored</description>
</property>
<property>
<name>datasourceIdWhitelist</name>
<value>-</value>
<description>a white list (comma separeted, - for empty list) of datasource ids</description>
</property>
<property>
<name>datasourceTypeWhitelist</name>
<value>-</value>
<description>a white list (comma separeted, - for empty list) of datasource types</description>
</property>
<property>
<name>datasourceIdBlacklist</name>
<value>-</value>
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
</property>
<property>
<name>esEventIndexName</name>
<description>the elasticsearch index name for events</description>
</property>
<property>
<name>esNotificationsIndexName</name>
<description>the elasticsearch index name for notifications</description>
</property>
<property>
<name>esIndexHost</name>
<description>the elasticsearch host</description>
</property>
<property>
<name>maxIndexedEventsForDsAndTopic</name>
<description>the max number of events for each couple (ds/topic)</description>
</property>
<property>
<name>brokerApiBaseUrl</name>
<description>the url of the broker service api</description>
</property>
<property>
<name>sparkDriverMemory</name>
@ -64,44 +99,18 @@
</configuration>
</global>
<start to="generate_events"/>
<start to="index_notifications"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="generate_events">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateEventsJob</name>
<class>eu.dnetlib.dhp.broker.oa.GenerateEventsJob</class>
<jar>dhp-broker-events-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--datasourceIdWhitelist</arg><arg>${datasourceIdWhitelist}</arg>
<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
</spark>
<ok to="index_es"/>
<error to="Kill"/>
</action>
<action name="index_es">
<action name="index_notifications">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>IndexOnESJob</name>
<class>eu.dnetlib.dhp.broker.oa.IndexOnESJob</class>
<name>IndexNotificationsOnESJob</name>
<class>eu.dnetlib.dhp.broker.oa.IndexNotificationsJob</class>
<jar>dhp-broker-events-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -114,37 +123,15 @@
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--index</arg><arg>${esIndexName}</arg>
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
<arg>--esHost</arg><arg>${esIndexHost}</arg>
</spark>
<ok to="stats"/>
<error to="Kill"/>
</action>
<action name="stats">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateStatsJob</name>
<class>eu.dnetlib.dhp.broker.oa.GenerateStatsJob</class>
<jar>dhp-broker-events-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.broker.oa.util;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Arrays;
import org.junit.jupiter.api.Test;
class SubscriptionUtilsTest {
@Test
void testVerifyListSimilar() {
assertTrue(SubscriptionUtils.verifyListSimilar(Arrays.asList("Michele Artini", "Claudio Atzori"), "artini"));
assertFalse(SubscriptionUtils.verifyListSimilar(Arrays.asList("Michele Artini", "Claudio Atzori"), "bardi"));
}
@Test
void testVerifyListExact() {
assertTrue(SubscriptionUtils.verifyListExact(Arrays.asList("Java", "Perl"), "perl"));
assertFalse(SubscriptionUtils.verifyListExact(Arrays.asList("Java", "Perl"), "C"));
}
@Test
void testVerifySimilar() {
assertTrue(SubscriptionUtils.verifySimilar("Java Programming", "java"));
assertFalse(SubscriptionUtils.verifySimilar("Java Programming", "soap"));
}
@Test
void testVerifyFloatRange() {
assertTrue(SubscriptionUtils.verifyFloatRange(0.5f, "0.4", "0.6"));
assertFalse(SubscriptionUtils.verifyFloatRange(0.8f, "0.4", "0.6"));
assertTrue(SubscriptionUtils.verifyFloatRange(0.5f, "", ""));
}
@Test
void testVerifyDateRange() {
final long date = 1282738478000l; // 25 August 2010
assertTrue(SubscriptionUtils.verifyDateRange(date, "2010-01-01", "2011-01-01"));
assertFalse(SubscriptionUtils.verifyDateRange(date, "2020-01-01", "2021-01-01"));
}
@Test
void testVerifyExact() {
assertTrue(SubscriptionUtils.verifyExact("Java Programming", "java programming"));
assertFalse(SubscriptionUtils.verifyExact("Java Programming", "soap programming"));
}
}

@ -289,4 +289,12 @@ public class JsonPathTest {
System.out.println("d = " + d);
}
@Test
public void testNull() throws Exception {
final Object p = null;
System.out.println((String) p);
}
}

@ -6,6 +6,7 @@ import java.util.Collection;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
@ -15,6 +16,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2;
@ -30,10 +33,16 @@ public class DedupRecordFactory {
final DedupConfig dedupConf) {
long ts = System.currentTimeMillis();
// <id, json_entity>
final JavaPairRDD<String, String> inputJsonEntities = sc
.textFile(entitiesInputPath)
final JavaPairRDD<String, String> inputJsonEntities = spark
.read()
.load(entitiesInputPath)
.as(Encoders.kryo(Oaf.class))
.map(
(MapFunction<Oaf, String>) p -> new org.codehaus.jackson.map.ObjectMapper().writeValueAsString(p),
Encoders.STRING())
.javaRDD()
.mapToPair(
(PairFunction<String, String, String>) it -> new Tuple2<String, String>(
(PairFunction<String, String, String>) it -> new Tuple2<>(
MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), it), it));
// <source, target>: source is the dedup_id, target is the id of the mergedIn
@ -74,9 +83,9 @@ public class DedupRecordFactory {
}
}
private static Publication publicationMerger(Tuple2<String, Iterable<String>> e, final long ts) {
private static DLIPublication publicationMerger(Tuple2<String, Iterable<String>> e, final long ts) {
Publication p = new Publication(); // the result of the merge, to be returned at the end
DLIPublication p = new DLIPublication(); // the result of the merge, to be returned at the end
p.setId(e._1());
@ -91,7 +100,7 @@ public class DedupRecordFactory {
.forEach(
pub -> {
try {
Publication publication = mapper.readValue(pub, Publication.class);
DLIPublication publication = mapper.readValue(pub, DLIPublication.class);
p.mergeFrom(publication);
p.setAuthor(DedupUtility.mergeAuthor(p.getAuthor(), publication.getAuthor()));
@ -110,9 +119,9 @@ public class DedupRecordFactory {
return p;
}
private static Dataset datasetMerger(Tuple2<String, Iterable<String>> e, final long ts) {
private static DLIDataset datasetMerger(Tuple2<String, Iterable<String>> e, final long ts) {
Dataset d = new Dataset(); // the result of the merge, to be returned at the end
DLIDataset d = new DLIDataset(); // the result of the merge, to be returned at the end
d.setId(e._1());

@ -9,18 +9,21 @@ import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.graphx.Edge;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.codehaus.jackson.map.ObjectMapper;
import com.google.common.hash.Hashing;
import eu.dnetlib.dedup.graph.ConnectedComponent;
import eu.dnetlib.dedup.graph.GraphProcessor;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.util.MapDocumentUtil;
@ -42,7 +45,6 @@ public class SparkCreateConnectedComponent {
.master(parser.get("master"))
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final String inputPath = parser.get("sourcePath");
final String entity = parser.get("entity");
final String targetPath = parser.get("targetPath");
@ -50,8 +52,12 @@ public class SparkCreateConnectedComponent {
// DedupConfig.load(IOUtils.toString(SparkCreateConnectedComponent.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/conf/org.curr.conf2.json")));
final DedupConfig dedupConf = DedupConfig.load(parser.get("dedupConf"));
final JavaPairRDD<Object, String> vertexes = sc
.textFile(inputPath + "/" + entity)
final JavaPairRDD<Object, String> vertexes = spark
.read()
.load(inputPath + "/" + entity)
.as(Encoders.kryo(Oaf.class))
.map((MapFunction<Oaf, String>) p -> new ObjectMapper().writeValueAsString(p), Encoders.STRING())
.javaRDD()
.map(s -> MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), s))
.mapToPair(
(PairFunction<String, Object, String>) s -> new Tuple2<Object, String>(getHashcode(s), s));

@ -4,10 +4,10 @@ package eu.dnetlib.dedup;
import org.apache.commons.io.IOUtils;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.pace.config.DedupConfig;
@ -41,12 +41,19 @@ public class SparkCreateDedupRecord {
DedupUtility.createEntityPath(sourcePath, entity),
OafEntityType.valueOf(entity),
dedupConf);
dedupRecord
.map(
r -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.writeValueAsString(r);
})
.saveAsTextFile(dedupPath + "/" + entity + "/dedup_records");
spark
.createDataset(dedupRecord.rdd(), Encoders.kryo(OafEntity.class))
.write()
.mode(SaveMode.Overwrite)
.save(dedupPath + "/" + entity + "/dedup_records");
//
//
// dedupRecord
// .map(
// r -> {
// ObjectMapper mapper = new ObjectMapper();
// return mapper.writeValueAsString(r);
// })
// .saveAsTextFile(dedupPath + "/" + entity + "/dedup_records");
}
}

@ -7,10 +7,13 @@ import org.apache.commons.io.IOUtils;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.codehaus.jackson.map.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
@ -46,8 +49,13 @@ public class SparkCreateSimRels {
// DedupConfig.load(IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
final DedupConfig dedupConf = DedupConfig.load(parser.get("dedupConf"));
JavaPairRDD<String, MapDocument> mapDocument = sc
.textFile(inputPath + "/" + entity)
JavaPairRDD<String, MapDocument> mapDocument = spark
.read()
.load(inputPath + "/" + entity)
.as(Encoders.kryo(Oaf.class))
.map((MapFunction<Oaf, String>) p -> new ObjectMapper().writeValueAsString(p), Encoders.STRING())
.javaRDD()
.repartition(1000)
.mapToPair(
s -> {
MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s);

@ -1,29 +1,19 @@
package eu.dnetlib.dedup.sx;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils;
import scala.Tuple2;
public class SparkPropagateRelationsJob {
enum FieldType {
SOURCE, TARGET
}
static final String SOURCEJSONPATH = "$.source";
static final String TARGETJSONPATH = "$.target";
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -39,7 +29,6 @@ public class SparkPropagateRelationsJob {
.master(parser.get("master"))
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final String relationPath = parser.get("relationPath");
final String mergeRelPath = parser.get("mergeRelPath");
final String targetRelPath = parser.get("targetRelPath");
@ -50,7 +39,13 @@ public class SparkPropagateRelationsJob {
.as(Encoders.bean(Relation.class))
.where("relClass == 'merges'");
final Dataset<Relation> rels = spark.read().load(relationPath).as(Encoders.bean(Relation.class));
final Dataset<Relation> rels = spark
.read()
.load(relationPath)
.as(Encoders.kryo(Relation.class))
.map(
(MapFunction<Relation, Relation>) r -> r,
Encoders.bean(Relation.class));
final Dataset<Relation> firstJoin = rels
.joinWith(merge, merge.col("target").equalTo(rels.col("source")), "left_outer")
@ -58,9 +53,10 @@ public class SparkPropagateRelationsJob {
(MapFunction<Tuple2<Relation, Relation>, Relation>) r -> {
final Relation mergeRelation = r._2();
final Relation relation = r._1();
if (mergeRelation != null)
relation.setSource(mergeRelation.getSource());
if (relation.getDataInfo() == null)
relation.setDataInfo(OafUtils.generateDataInfo("0.9", false));
return relation;
},
Encoders.bean(Relation.class));
@ -75,38 +71,8 @@ public class SparkPropagateRelationsJob {
relation.setTarget(mergeRelation.getSource());
return relation;
},
Encoders.bean(Relation.class));
Encoders.kryo(Relation.class));
secondJoin.write().mode(SaveMode.Overwrite).save(targetRelPath);
}
private static boolean containsDedup(final String json) {
final String source = DHPUtils.getJPathString(SOURCEJSONPATH, json);
final String target = DHPUtils.getJPathString(TARGETJSONPATH, json);
return source.toLowerCase().contains("dedup") || target.toLowerCase().contains("dedup");
}
private static String replaceField(final String json, final String id, final FieldType type) {
ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
try {
Relation relation = mapper.readValue(json, Relation.class);
if (relation.getDataInfo() == null)
relation.setDataInfo(new DataInfo());
relation.getDataInfo().setDeletedbyinference(false);
switch (type) {
case SOURCE:
relation.setSource(id);
return mapper.writeValueAsString(relation);
case TARGET:
relation.setTarget(id);
return mapper.writeValueAsString(relation);
default:
throw new IllegalArgumentException("");
}
} catch (IOException e) {
throw new RuntimeException("unable to deserialize json relation: " + json, e);
}
}
}

@ -0,0 +1,75 @@
package eu.dnetlib.dedup.sx
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation}
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIUnknown, OafUtils}
import org.apache.commons.io.IOUtils
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.LoggerFactory
import org.apache.spark.sql.functions.col
object SparkUpdateEntityWithDedupInfo {
def main(args: Array[String]): Unit = {
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkUpdateEntityWithDedupInfo.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/dedup/dedup_delete_by_inference_parameters.json")))
val logger = LoggerFactory.getLogger(SparkUpdateEntityWithDedupInfo.getClass)
parser.parseArgument(args)
val workingPath: String = parser.get("workingPath")
logger.info(s"Working dir path = $workingPath")
implicit val oafEncoder: Encoder[OafEntity] = Encoders.kryo[OafEntity]
implicit val relEncoder: Encoder[Relation] = Encoders.bean(classOf[Relation])
implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo[DLIPublication]
implicit val datEncoder: Encoder[DLIDataset] = Encoders.kryo[DLIDataset]
implicit val unkEncoder: Encoder[DLIUnknown] = Encoders.kryo[DLIUnknown]
val spark: SparkSession = SparkSession
.builder()
.appName(SparkUpdateEntityWithDedupInfo.getClass.getSimpleName)
.master(parser.get("master"))
.getOrCreate()
val entityPath = parser.get("entityPath")
val mergeRelPath = parser.get("mergeRelPath")
val dedupRecordPath = parser.get("dedupRecordPath")
val entity = parser.get("entity")
val destination = parser.get("targetPath")
val mergedIds = spark.read.load(mergeRelPath).as[Relation]
.where("relClass == 'merges'")
.select(col("target"))
val entities: Dataset[(String, OafEntity)] = spark
.read
.load(entityPath).as[OafEntity]
.map(o => (o.getId, o))(Encoders.tuple(Encoders.STRING, oafEncoder))
val finalDataset:Dataset[OafEntity] = entities.joinWith(mergedIds, entities("_1").equalTo(mergedIds("target")), "left")
.map(k => {
val e: OafEntity = k._1._2
val t = k._2
if (t != null && t.getString(0).nonEmpty) {
if (e.getDataInfo == null) {
e.setDataInfo(OafUtils.generateDataInfo())
}
e.getDataInfo.setDeletedbyinference(true)
}
e
})
val dedupRecords :Dataset[OafEntity] = spark.read.load(dedupRecordPath).as[OafEntity]
finalDataset.union(dedupRecords)
.repartition(1200).write
.mode(SaveMode.Overwrite).save(destination)
}
}

@ -53,6 +53,7 @@
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--executor-cores=${sparkExecutorCores}
${sparkExtraOPT}
</spark-opts>
<arg>-mt</arg><arg>yarn-cluster</arg>
@ -77,6 +78,7 @@
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--executor-cores=${sparkExecutorCores}
${sparkExtraOPT}
</spark-opts>
<arg>-mt</arg><arg>yarn-cluster</arg>
@ -101,6 +103,7 @@
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--executor-cores=${sparkExecutorCores}
${sparkExtraOPT}
</spark-opts>
<arg>-mt</arg><arg>yarn-cluster</arg>
@ -125,6 +128,7 @@
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--executor-cores=${sparkExecutorCores}
${sparkExtraOPT}
</spark-opts>
<arg>-mt</arg><arg>yarn-cluster</arg>
@ -144,11 +148,12 @@
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Update ${entity} and add DedupRecord</name>
<class>eu.dnetlib.dedup.sx.SparkUpdateEntityJob</class>
<class>eu.dnetlib.dedup.sx.SparkUpdateEntityWithDedupInfo</class>
<jar>dhp-dedup-scholexplorer-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--executor-cores=${sparkExecutorCores}
${sparkExtraOPT}
</spark-opts>
<arg>-mt</arg><arg>yarn-cluster</arg>

@ -109,9 +109,9 @@ public class PropagationConstant {
}
public static String getConstraintList(String text, List<String> constraints) {
String ret = " and (" + text + constraints.get(0) + "'";
String ret = " and (" + text + constraints.get(0).toLowerCase() + "'";
for (int i = 1; i < constraints.size(); i++) {
ret += " OR " + text + constraints.get(i) + "'";
ret += " OR " + text + constraints.get(i).toLowerCase() + "'";
}
ret += ")";
return ret;

@ -110,13 +110,6 @@ public class CommunityConfigurationFactory {
}
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
final Node oacommunitynode = node.selectSingleNode("./oacommunity");
String oacommunity = null;
if (oacommunitynode != null) {
String tmp = oacommunitynode.getText();
if (StringUtils.isNotBlank(tmp))
oacommunity = tmp;
}
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
@ -127,11 +120,7 @@ public class CommunityConfigurationFactory {
zenodoCommunityList.add(zc);
}
if (oacommunity != null) {
ZenodoCommunity zc = new ZenodoCommunity();
zc.setZenodoCommunityId(oacommunity);
zenodoCommunityList.add(zc);
}
log.info("size of the zenodo community list " + zenodoCommunityList.size());
return zenodoCommunityList;
}

@ -44,7 +44,7 @@ public class Provider implements Serializable {
}
private void setSelCriteria(String json, VerbResolver resolver) {
log.info("Selection constraints for datasource = " + json);
log.debug("Selection constraints for datasource = " + json);
selectionConstraints = new Gson().fromJson(json, SelectionConstraints.class);
selectionConstraints.setSelection(resolver);
@ -54,7 +54,7 @@ public class Provider implements Serializable {
try {
setSelCriteria(n.getText(), resolver);
} catch (Exception e) {
log.info("not set selection criteria... ");
log.debug("not set selection criteria... ");
selectionConstraints = null;
}
}

@ -17,7 +17,9 @@ public class QueryInformationSystem {
+ " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept "
+ " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept "
+ " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept "
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] "
+
"let $zenodo := $x//param[./@name='zenodoCommunity']/text() "
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' "
+ " return "
+ " <community> "
+ " { $x//CONFIGURATION/context/@id} "
@ -38,8 +40,15 @@ public class QueryInformationSystem {
+ " {$d/param[./@name='selcriteria']/text()} "
+ " </selcriteria> "
+ " </datasource> } "
+ " </datasources> "
+ " <zenodocommunities> "
+ " </datasources> " +
" <zenodocommunities> " +
"{for $zc in $zenodo " +
"return " +
"<zenodocommunity> " +
"<zenodoid> " +
"{$zc} " +
"</zenodoid> " +
"</zenodocommunity>}"
+ " {for $zc in $communities "
+ " return "
+ " <zenodocommunity> "

@ -20,8 +20,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
/** Created by miriam on 02/08/2018. */
public class ResultTagger implements Serializable {
private String trust = "0.8";
private boolean clearContext(Result result) {
int tmp = result.getContext().size();
List<Context> clist = result
@ -71,10 +69,10 @@ public class ResultTagger implements Serializable {
// tagging for Subject
final Set<String> subjects = new HashSet<>();
Optional<List<StructuredProperty>> oresultsubj = Optional.ofNullable(result.getSubject());
if (oresultsubj.isPresent()) {
oresultsubj
.get()
if (Objects.nonNull(result.getSubject())) {
result
.getSubject()
.stream()
.map(subject -> subject.getValue())
.filter(StringUtils::isNotBlank)
@ -90,15 +88,23 @@ public class ResultTagger implements Serializable {
final Set<String> datasources = new HashSet<>();
final Set<String> tmp = new HashSet<>();
Optional<List<Instance>> oresultinstance = Optional.ofNullable(result.getInstance());
if (oresultinstance.isPresent()) {
for (Instance i : oresultinstance.get()) {
tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
if (Objects.nonNull(result.getInstance())) {
for (Instance i : result.getInstance()) {
if (Objects.nonNull(i.getCollectedfrom())) {
if (Objects.nonNull(i.getCollectedfrom().getKey())) {
tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
}
}
if (Objects.nonNull(i.getHostedby())) {
if (Objects.nonNull(i.getHostedby().getKey())) {
tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
}
}
}
oresultinstance
.get()
result
.getInstance()
.stream()
.map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
.flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
@ -163,21 +169,24 @@ public class ResultTagger implements Serializable {
getDataInfo(
BULKTAG_DATA_INFO_TYPE,
CLASS_ID_SUBJECT,
CLASS_NAME_BULKTAG_SUBJECT));
CLASS_NAME_BULKTAG_SUBJECT,
TAGGING_TRUST));
if (datasources.contains(c.getId()))
dataInfoList
.add(
getDataInfo(
BULKTAG_DATA_INFO_TYPE,
CLASS_ID_DATASOURCE,
CLASS_NAME_BULKTAG_DATASOURCE));
CLASS_NAME_BULKTAG_DATASOURCE,
TAGGING_TRUST));
if (czenodo.contains(c.getId()))
dataInfoList
.add(
getDataInfo(
BULKTAG_DATA_INFO_TYPE,
CLASS_ID_CZENODO,
CLASS_NAME_BULKTAG_ZENODO));
CLASS_NAME_BULKTAG_ZENODO,
TAGGING_TRUST));
}
return c;
})
@ -203,21 +212,24 @@ public class ResultTagger implements Serializable {
getDataInfo(
BULKTAG_DATA_INFO_TYPE,
CLASS_ID_SUBJECT,
CLASS_NAME_BULKTAG_SUBJECT));
CLASS_NAME_BULKTAG_SUBJECT,
TAGGING_TRUST));
if (datasources.contains(c))
dataInfoList
.add(
getDataInfo(
BULKTAG_DATA_INFO_TYPE,
CLASS_ID_DATASOURCE,
CLASS_NAME_BULKTAG_DATASOURCE));
CLASS_NAME_BULKTAG_DATASOURCE,
TAGGING_TRUST));
if (czenodo.contains(c))
dataInfoList
.add(
getDataInfo(
BULKTAG_DATA_INFO_TYPE,
CLASS_ID_CZENODO,
CLASS_NAME_BULKTAG_ZENODO));
CLASS_NAME_BULKTAG_ZENODO,
TAGGING_TRUST));
context.setDataInfo(dataInfoList);
return context;
})
@ -228,11 +240,12 @@ public class ResultTagger implements Serializable {
}
public static DataInfo getDataInfo(
String inference_provenance, String inference_class_id, String inference_class_name) {
String inference_provenance, String inference_class_id, String inference_class_name, String trust) {
DataInfo di = new DataInfo();
di.setInferred(true);
di.setInferenceprovenance(inference_provenance);
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
di.setTrust(trust);
return di;
}

@ -14,4 +14,6 @@ public class TaggingConstants {
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
public static final String TAGGING_TRUST = "0.8";
}

@ -96,27 +96,6 @@ public class PrepareDatasourceCountryAssociation {
relation.createOrReplaceTempView("relation");
organization.createOrReplaceTempView("organization");
// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
// + "FROM ( SELECT id "
// + " FROM datasource "
// + " WHERE (datainfo.deletedbyinference = false "
// + whitelisted
// + ") "
// + getConstraintList("datasourcetype.classid = '", allowedtypes)
// + ") d "
// + "JOIN ( SELECT source, target "
// + " FROM relation "
// + " WHERE relclass = '"
// + ModelConstants.IS_PROVIDED_BY
// + "' "
// + " AND datainfo.deletedbyinference = false ) rel "
// + "ON d.id = rel.source "
// + "JOIN (SELECT id, country "
// + " FROM organization "
// + " WHERE datainfo.deletedbyinference = false "
// + " AND length(country.classid) > 0) o "
// + "ON o.id = rel.target";
String query = "SELECT source dataSourceId, " +
"named_struct('classid', country.classid, 'classname', country.classname) country " +
"FROM datasource d " +
@ -125,7 +104,7 @@ public class PrepareDatasourceCountryAssociation {
"JOIN organization o " +
"ON o.id = rel.target " +
"WHERE rel.datainfo.deletedbyinference = false " +
"and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" +
"and lower(rel.relclass) = '" + ModelConstants.IS_PROVIDED_BY.toLowerCase() + "'" +
"and o.datainfo.deletedbyinference = false " +
"and length(o.country.classid) > 0 " +
"and (" + allowed + " or " + whitelisted + ")";

@ -102,15 +102,17 @@ public class PrepareResultOrcidAssociationStep1 {
+ " FROM result "
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
+ " WHERE MyP.qualifier.classid = 'ORCID') tmp "
+ " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp "
+ " GROUP BY id) r_t "
+ " JOIN ("
+ " SELECT source, target "
+ " FROM relation "
+ " WHERE datainfo.deletedbyinference = false "
+ getConstraintList(" relclass = '", allowedsemrel)
+ getConstraintList(" lower(relclass) = '", allowedsemrel)
+ " ) rel_rel "
+ " ON source = id";
log.info("executedQuery: {}", query);
spark
.sql(query)
.as(Encoders.bean(ResultOrcidList.class))

@ -85,8 +85,8 @@ public class PrepareProjectResultsAssociation {
String resproj_relation_query = "SELECT source, target "
+ " FROM relation "
+ " WHERE datainfo.deletedbyinference = false "
+ " AND relClass = '"
+ ModelConstants.IS_PRODUCED_BY
+ " AND lower(relClass) = '"
+ ModelConstants.IS_PRODUCED_BY.toLowerCase()
+ "'";
Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
@ -98,7 +98,7 @@ public class PrepareProjectResultsAssociation {
+ " FROM (SELECT source, target "
+ " FROM relation "
+ " WHERE datainfo.deletedbyinference = false "
+ getConstraintList(" relClass = '", allowedsemrel)
+ getConstraintList(" lower(relClass) = '", allowedsemrel)
+ " ) r1"
+ " JOIN resproj_relation r2 "
+ " ON r1.source = r2.source "

@ -76,14 +76,14 @@ public class PrepareResultCommunitySet {
+ "FROM (SELECT source, target "
+ " FROM relation "
+ " WHERE datainfo.deletedbyinference = false "
+ " AND relClass = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION
+ " AND lower(relClass) = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
+ "') result_organization "
+ "LEFT JOIN (SELECT source, collect_set(target) org_set "
+ " FROM relation "
+ " WHERE datainfo.deletedbyinference = false "
+ " AND relClass = '"
+ ModelConstants.MERGES
+ " AND lower(relClass) = '"
+ ModelConstants.MERGES.toLowerCase()
+ "' "
+ " GROUP BY source) organization_organization "
+ "ON result_organization.target = organization_organization.source ";

@ -144,8 +144,8 @@ public class PrepareResultCommunitySetStep1 {
String resultContextQuery = String
.format(
RESULT_CONTEXT_QUERY_TEMPLATE,
getConstraintList(" co.id = '", communityIdList),
getConstraintList(" relClass = '", allowedsemrel));
getConstraintList(" lower(co.id) = '", communityIdList),
getConstraintList(" lower(relClass) = '", allowedsemrel));
Dataset<Row> result_context = spark.sql(resultContextQuery);
result_context.createOrReplaceTempView("result_context");

@ -91,8 +91,8 @@ public class PrepareResultInstRepoAssociation {
+ "AND datainfo.deletedbyinference = false ) d "
+ "JOIN ( SELECT source, target "
+ "FROM relation "
+ "WHERE relclass = '"
+ ModelConstants.IS_PROVIDED_BY
+ "WHERE lower(relclass) = '"
+ ModelConstants.IS_PROVIDED_BY.toLowerCase()
+ "' "
+ "AND datainfo.deletedbyinference = false ) rel "
+ "ON d.id = rel.source ";
@ -111,8 +111,8 @@ public class PrepareResultInstRepoAssociation {
String query = "Select source resultId, collect_set(target) organizationSet "
+ "from relation "
+ "where datainfo.deletedbyinference = false "
+ "and relClass = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION
+ "and lower(relClass) = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
+ "' "
+ "group by source";

@ -266,7 +266,6 @@
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
@ -298,7 +297,6 @@
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
@ -330,7 +328,6 @@
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
@ -362,7 +359,6 @@
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>

@ -176,7 +176,6 @@
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
</spark>
<ok to="wait2"/>
@ -206,7 +205,6 @@
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
</spark>
<ok to="wait2"/>
@ -236,7 +234,6 @@
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
</spark>
<ok to="wait2"/>
@ -266,7 +263,6 @@
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
</spark>
<ok to="wait2"/>

@ -106,12 +106,6 @@
<subject>aqua</subject>
<subject>sea</subject>
</subjects>
<providers>
<datasource>
<openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
<selcriteria/>
</datasource>
</providers>
<zenodocommunities/>
</community>
<community id="aginfra">
@ -163,7 +157,11 @@
<zenodocommunities/>
</community>
<community id="clarin">
<oacommunity>oac_clarin</oacommunity>
<zenodocommunities>
<zenodocommunity>
<zenodoid>oac_clarin</zenodoid>
</zenodocommunity>
</zenodocommunities>
<subjects/>
<providers>
<datasource>

@ -257,6 +257,9 @@
<zenodoid>bodhgaya</zenodoid>
<selcriteria/>
</zenodocommunity>
<zenodocommunity>
<zenodoid>oac_dh-ch</zenodoid>
</zenodocommunity>
</zenodocommunities>
<organizations/>
</community>

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save