Compare commits

...

57 Commits

Author SHA1 Message Date
Miriam Baglioni 10b60ede5d merge branch with master 2020-12-16 10:53:43 +01:00
Miriam Baglioni 8b83d47bc1 - 2020-12-16 10:49:08 +01:00
Miriam Baglioni 5df74ebe9c - 2020-12-15 18:59:23 +01:00
Miriam Baglioni 24bbd30e80 merge branch with master 2020-12-14 13:35:27 +01:00
Miriam Baglioni 9d5e6eaf3f - 2020-12-14 13:34:34 +01:00
Miriam Baglioni 5983f6ffc6 - 2020-12-14 13:34:12 +01:00
Miriam Baglioni 818665451f merge with master 2020-12-11 12:25:18 +01:00
Miriam Baglioni 6b8e947bcf - 2020-12-03 14:51:47 +01:00
Miriam Baglioni f6b7c297a8 merge with master 2020-12-03 13:46:15 +01:00
Miriam Baglioni 6962cd7c18 Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science 2020-07-10 19:40:02 +02:00
Miriam Baglioni 00018711d8 - 2020-07-10 19:38:13 +02:00
Miriam Baglioni 4e494f5152 added check to verify the used record is not empty 2020-07-10 17:00:30 +02:00
Miriam Baglioni d77d213d7c - 2020-07-09 11:46:32 +02:00
Miriam Baglioni b1dad7959c - 2020-07-08 10:38:52 +02:00
Miriam Baglioni 7ddd8590d0 changed library to OkHttp 2020-07-03 15:17:11 +02:00
Alessia Bardi a9dc05d8c7 updated list of duplicates to purge 2020-07-02 17:53:49 +02:00
Alessia Bardi d0f9891355 purg arg is the actual name of the item in the catalogue, not the OpenAIRE objIdentifier 2020-07-02 16:04:48 +02:00
Alessia Bardi 3600d39f96 list of ids to purge because duplicates in the gateway 2020-07-02 16:01:24 +02:00
Alessia Bardi 0c9539a301 Merge branch 'd4science' of https://code-repo.d4science.org/miriam.baglioni/dnet-hadoop into d4science 2020-07-02 15:50:18 +02:00
Alessia Bardi 3d3178d4e8 changes so the class compiles on my intellij 2020-07-02 15:48:48 +02:00
Miriam Baglioni 1c212cb6b9 new step to the oozie workflow 2020-07-02 14:24:13 +02:00
Miriam Baglioni a7b26d7f07 Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science 2020-07-02 14:23:08 +02:00
Miriam Baglioni 0d10e3bd22 modified the mapping to include the groups. Added step to workflow to send directly to the catalogue 2020-07-02 14:22:20 +02:00
Miriam Baglioni 2d380aea1d added logic to directly send records to catalogue 2020-07-02 11:12:14 +02:00
Miriam Baglioni 566a763175 - 2020-07-01 18:13:48 +02:00
Miriam Baglioni daa5d933e0 modification to the model due to change in the mapping 2020-07-01 17:43:23 +02:00
Miriam Baglioni e71e857e48 removed test 2020-07-01 17:42:32 +02:00
Miriam Baglioni 9864bff488 mapping adaptations 2020-07-01 17:41:58 +02:00
Alessia Bardi 2c3b92ff10 ckan groups for access rights and data sources 2020-07-01 16:49:33 +02:00
Miriam Baglioni 42ee1ef284 Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science 2020-06-30 14:09:04 +02:00
Alessia Bardi 474ae69df8 use the same name generation procedure of the mapping 2020-06-24 12:59:03 +02:00
Miriam Baglioni 952a4a4482 - 2020-06-24 10:59:58 +02:00
Miriam Baglioni 563378ce3f changed the mapping and added new resources for testing 2020-06-23 15:30:34 +02:00
Miriam Baglioni d6838e18e6 Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science 2020-06-23 11:57:30 +02:00
Miriam Baglioni de62582c28 new test resource 2020-06-23 11:57:25 +02:00
Alessia Bardi 743d948d1c print logs 2020-06-23 11:52:19 +02:00
Miriam Baglioni a2aa3c5b67 Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science 2020-06-23 11:36:39 +02:00
Alessia Bardi fcabee9242 last wrong assert fixed 2020-06-23 11:36:04 +02:00
Miriam Baglioni 2d9811ac4c Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science 2020-06-23 11:31:16 +02:00
Alessia Bardi 71ef7d9e66 using proper assertions 2020-06-23 11:30:41 +02:00
Miriam Baglioni 60a3206de5 fixed a typo in the name of a filed 2020-06-23 11:19:23 +02:00
Miriam Baglioni f12b1ede24 Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science 2020-06-23 11:16:25 +02:00
Alessia Bardi b762c28cb6 moved test to proper package 2020-06-23 11:15:02 +02:00
Miriam Baglioni 844948f3e0 real output from the cluster 2020-06-23 11:08:43 +02:00
Miriam Baglioni 33e2ebeaaa fix to the mapper, and changed of the json for testing 2020-06-23 11:07:42 +02:00
Alessia Bardi a27b93859e method to purge all items in the d4science catalog 2020-06-22 19:25:25 +02:00
Miriam Baglioni 3da12be81f - 2020-06-22 19:14:06 +02:00
Alessia Bardi d9c07eb800 GCat API and test - disabled 2020-06-22 18:49:04 +02:00
Miriam Baglioni 1566fd590e added set of same type of entries -url cf hb- before creating extras to have them distinct 2020-06-22 17:45:38 +02:00
Miriam Baglioni 004bf225cb added repartition to one before writing so as to have just one file for each community product 2020-06-22 17:38:02 +02:00
Miriam Baglioni e983d02c1c added check to fix issue when entry is present but value it is not 2020-06-22 17:37:30 +02:00
Miriam Baglioni b570f011d1 changed the workflow name 2020-06-22 16:53:32 +02:00
Miriam Baglioni d133368d2d merge branch with fork master 2020-06-22 16:25:56 +02:00
Miriam Baglioni c84736fa56 the general entry for the catalogue 2020-06-22 16:25:01 +02:00
Miriam Baglioni 25a7205549 merge branch with fork master 2020-06-22 16:23:23 +02:00
Miriam Baglioni 06b03840bd new classes for Gcat catalogue, Mapping to the catalogue, spark code and workflow definition 2020-06-22 16:23:00 +02:00
Alessia Bardi ec19fcace0 API for D4science GCat 2020-06-19 17:37:22 +02:00
27 changed files with 2095 additions and 0 deletions

View File

@ -0,0 +1,92 @@
package eu.dnetlib.dhp.schema.dump.gcat;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
public class CatalogueEntry implements Serializable {
private String name; // openaire id withouut :: substitute with $$
private String license_id; // default "notspecified",
private String title; // title.maintitle
private String notes; // description.value (the first description
private String url; // the url of the resource in the openaire dashboard
private String version; // valid for datasets
private List<Tag> tags; // subject and keywords
private List<Group> groups; // access and publishers
private List<KeyValue> extras;
public List<Group> getGroups() {
return groups;
}
public void setGroups(List<Group> groups) {
this.groups = groups;
}
public List<Tag> getTags() {
return tags;
}
public void setTags(List<Tag> tags) {
this.tags = tags;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getLicense_id() {
return license_id;
}
public void setLicense_id(String license_id) {
this.license_id = license_id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getNotes() {
return notes;
}
public void setNotes(String notes) {
this.notes = notes;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public List<KeyValue> getExtras() {
return extras;
}
public void setExtras(List<KeyValue> extras) {
this.extras = extras;
}
}

View File

@ -0,0 +1,37 @@
package eu.dnetlib.dhp.schema.dump.gcat;
import java.io.Serializable;
public class Group implements Serializable {
private String id;
private String name;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public static Group newInstance(String id, String name) {
Group g = new Group();
if (id != null) {
g.id = id;
}
if (name != null) {
g.name = name;
}
return g;
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.schema.dump.gcat;
import java.io.Serializable;
import com.fasterxml.jackson.core.SerializableString;
public class Tag implements Serializable {
private String name;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public static Tag newInstance(String n) {
Tag t = new Tag();
t.name = n;
return t;
}
}

View File

@ -7,8 +7,14 @@ import com.google.common.collect.Maps;
public class Constants { public class Constants {
public static String PUBLICATION_URL = "https://science-innovation-policy.openaire.eu/search/publication?articleId=";
public static String DATASET_URL = "https://science-innovation-policy.openaire.eu/search/dataset?datasetId=";
public static String SOFTWARE_URL = "https://science-innovation-policy.openaire.eu/search/software?softwareId=";
public static String ORP_URL = "https://science-innovation-policy.openaire.eu/search/other?orpId=";
public static String DEFAULT_LICENCE_ID = "notspecified";
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap(); public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap(); public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
public static final Map<String, String> gcatCatalogue = Maps.newHashMap();
public static final String INFERRED = "Inferred by OpenAIRE"; public static final String INFERRED = "Inferred by OpenAIRE";
@ -26,6 +32,8 @@ public class Constants {
public static String ORCID = "orcid"; public static String ORCID = "orcid";
public static String UNKNOWN = "unknown";
static { static {
accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("OPEN", "c_abf2");
accessRightsCoarMap.put("RESTRICTED", "c_16ec"); accessRightsCoarMap.put("RESTRICTED", "c_16ec");
@ -41,6 +49,16 @@ public class Constants {
coarCodeLabelMap.put("c_f1cf", "EMBARGO"); coarCodeLabelMap.put("c_f1cf", "EMBARGO");
} }
static {
gcatCatalogue.put("OPEN", "OPEN");
gcatCatalogue.put("RESTRICTED", "RESTRICTED");
gcatCatalogue.put("OPEN SOURCE", "OPEN");
gcatCatalogue.put("CLOSED", "CLOSED");
gcatCatalogue.put("EMBARGO", "EMBARGO");
gcatCatalogue.put("UNKNOWN", "UNKNOWN");
gcatCatalogue.put("OTHER", "UNKNOWN");
}
public enum DUMPTYPE { public enum DUMPTYPE {
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder"); COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder");
@ -53,5 +71,6 @@ public class Constants {
public String getType() { public String getType() {
return type; return type;
} }
} }
} }

View File

@ -0,0 +1,177 @@
package eu.dnetlib.dhp.oa.graph.dump.gcat;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import com.google.gson.Gson;
import okhttp3.*;
/**
* Created by Alessia Bardi on 19/06/2020.
*
* @author Alessia Bardi
*/
public class GCatAPIClient {
private static final Log log = LogFactory.getLog(GCatAPIClient.class);
public static final int BULK_SIZE = 100;
private String gcatBaseURL;
private final String itemPath = "items";
private String applicationToken;
private static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
public GCatAPIClient() {
}
/**
* Publish the json as in the D4science catalogue as an item.
*
* @param jsonMetadata the whole published json record
* @return the HTTP status code of the request
* @throws IOException
*/
public int publish(final String jsonMetadata) throws IOException, URISyntaxException {
OkHttpClient httpCLient = new OkHttpClient();
RequestBody body = RequestBody.create(jsonMetadata, MEDIA_TYPE_JSON);
Request request = new Request.Builder()
.url(getGcatBaseURL() + itemPath)
.header("gcube-token", getApplicationToken())
.addHeader("Content-Type", "application/json")
.addHeader("Accept", "application/json")
.post(body)
.build();
try (Response response = httpCLient.newCall(request).execute()) {
if (log.isDebugEnabled()) {
log.debug(response.code());
System.out.println(response.code());
log.debug(response.body().string());
}
if (!response.isSuccessful()) {
throw new IOException("Unexpected code " + response);
}
return response.code();
}
}
/**
* List items in the catalogue
*
* @param offset offset
* @param limit limit
* @return list of catalogue item names
* @throws IOException
* @throws URISyntaxException
*/
public List<String> list(final int offset, final int limit) throws IOException, URISyntaxException {
OkHttpClient httpClient = new OkHttpClient();
Request request = new Request.Builder()
.url(getGcatBaseURL() + itemPath + "?offset=" + offset + "&limit=" + limit)
.header("gcube-token", getApplicationToken())
.addHeader("Content-Type", "application/json")
.addHeader("Accept", "application/json")
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
int status = response.code();
if (status >= 200 && status < 300) {
String entity = response.body().string();
return entity != null ? new Gson().fromJson(entity, List.class) : null;
} else {
throw new ClientProtocolException("Unexpected response status: " + status);
}
}
}
public boolean purge(final String resCatName) throws IOException, URISyntaxException {
try (CloseableHttpClient client = HttpClients.createDefault()) {
URIBuilder builder = new URIBuilder(getGcatBaseURL() + itemPath + "/" + resCatName)
.addParameter("purge", "true");
URI uri = builder.build();
System.out.println(uri.toString());
HttpDelete del = new HttpDelete(uri);
del.setHeader("gcube-token", getApplicationToken());
del.addHeader("Content-Type", "application/json");
del.addHeader("Accept", "application/json");
HttpResponse response = client.execute(del);
if (log.isDebugEnabled()) {
log.debug(response.getStatusLine());
}
return response.getStatusLine().getStatusCode() == HttpStatus.SC_NO_CONTENT;
}
}
public int purgeAll() throws IOException, URISyntaxException {
int count = 0;
int deleted = 0;
int failed = 0;
List<String> list = list(0, BULK_SIZE);
do {
for (String itemName : list) {
count++;
if (purge(itemName))
deleted++;
else {
failed++;
log.warn("Deletion of item " + itemName + " failed");
}
}
list = list(0, BULK_SIZE);
} while (list.size() > 0);
log.info(String.format("PurgeAll completed: total = %d; deleted = %d; failed = %d", count, deleted, failed));
return deleted;
}
public String getGcatBaseURL() {
return gcatBaseURL;
}
public void setGcatBaseURL(String gcatBaseURL) {
this.gcatBaseURL = gcatBaseURL;
}
public String getApplicationToken() {
return applicationToken;
}
public void setApplicationToken(String applicationToken) {
this.applicationToken = applicationToken;
}
}

View File

@ -0,0 +1,403 @@
package eu.dnetlib.dhp.oa.graph.dump.gcat;
import java.io.Serializable;
import java.io.StringWriter;
import java.security.Key;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.avro.generic.GenericData;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.util.LongAccumulator;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry;
import eu.dnetlib.dhp.schema.dump.gcat.Group;
import eu.dnetlib.dhp.schema.dump.gcat.Tag;
import eu.dnetlib.dhp.schema.dump.oaf.*;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import org.omg.CORBA.UNKNOWN;
public class Mapper implements Serializable {
private static final List<String> publishers = Arrays
.asList("zenodo", "hal", "figshare", "digital-csic", "dans", "datacite");
private static final List<String> access = Arrays.asList("open", "closed", "embargoed", "restricted");
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> CatalogueEntry map(I input,
Map<String, LongAccumulator> map) {
final CatalogueEntry out = new CatalogueEntry();
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
List<KeyValue> externals = new ArrayList<>();
Set<String> urlSet = new HashSet<>();
Set<String> cfSet = new HashSet<>();
Set<String> hbSet = new HashSet<>();
Set<String> countrySet = new HashSet<>();
Set<String> groups = new HashSet<>();
List<Group> groupList = new ArrayList<>();
if (ort.isPresent()) {
switch (ort.get().getClassid()) {
case "publication":
Optional<Journal> oJournal = Optional
.ofNullable(((Publication) input).getJournal());
if (oJournal.isPresent()) {
Journal value = oJournal.get();
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Journal",
// "Journal",
value.getName() + ", " + value.getVol() + ", " + value.getIss()));
}
out.setUrl(Constants.PUBLICATION_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "publication"));
break;
case "dataset":
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
out
.setVersion(
Optional
.ofNullable(id.getVersion())
.map(v -> v.getValue())
.orElse(""));
out.setUrl(Constants.DATASET_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "dataset"));
break;
case "software":
eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
Optional
.ofNullable(is.getCodeRepositoryUrl())
.ifPresent(value -> urlSet.add(value.getValue()));
Optional
.ofNullable(is.getDocumentationUrl())
.ifPresent(value -> value.forEach(v -> urlSet.add(v.getValue())));
externals
.add(
KeyValue
.newInstance(
"Programming Language", Optional
.ofNullable(is.getProgrammingLanguage())
.map(v -> v.getClassname())
.orElse("")));
// .ifPresent(
// value -> externals.add(KeyValue.newInstance("Programming Language", value.getClassname())));
out.setUrl(Constants.SOFTWARE_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "software"));
break;
case "other":
out.setUrl(Constants.ORP_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "other"));
break;
}
out.setLicense_id(Constants.DEFAULT_LICENCE_ID);
Optional<List<Author>> oauth = Optional
.ofNullable(input.getAuthor());
List<String> authList = new ArrayList<>();
if (oauth.isPresent()) {
authList = oauth.get().stream().map(v -> getAuthor(v)).collect(Collectors.toList());
}
if (authList.size() > 0) {
authList.forEach(a -> externals.add(KeyValue.newInstance("Risis2_Attribution:Author", a)));
// authList.forEach(a -> externals.add(KeyValue.newInstance("Author", a)));
}
String accessr = Optional
.ofNullable(input.getBestaccessright())
.map(
value -> value.getClassid())
.orElse("");
if (access.contains(accessr.toLowerCase())) {
groupList.add(Group.newInstance(null, accessr.toLowerCase().trim()));
}
if (!accessr.equals("")) {
externals
.add(
KeyValue
.newInstance(
"AccessMode:Access Right", input.getBestaccessright().getClassname()));
}
Optional
.ofNullable(input.getCollectedfrom())
.ifPresent(
value -> value
.forEach(v -> cfSet.add(v.getValue())));
Optional<List<Field<String>>> ocont = Optional
.ofNullable(input.getContributor());
if (ocont.isPresent()) {
ocont
.get()
.forEach(
v -> externals
.add(
KeyValue
.newInstance(
"Risis2_Attribution:Contributor",
v.getValue())));
// .forEach(v -> externals.add(KeyValue.newInstance("Contributor", v.getValue())));
}
Optional
.ofNullable(input.getCountry())
.ifPresent(
value -> value
.forEach(v -> countrySet.add(v.getClassname())));
Optional
.ofNullable(input.getDescription())
.ifPresent(value ->
getDescription(out, externals, value));
externals
.add(
KeyValue
.newInstance(
"AccessMode:Embargo End Date", Optional
.ofNullable(input.getEmbargoenddate())
.map(value -> value.getValue())
.orElse("")));
final Set<String> formatSet = new HashSet<>();
Optional
.ofNullable(input.getFormat())
.ifPresent(value -> value.forEach(f -> formatSet.add(f.getValue())));
String id = input.getId();
// id = id.substring(0, id.lastIndexOf(":") + 1) + "a" + id.substring(id.lastIndexOf(":") + 1);
out.setName(id.substring(id.indexOf('|') + 1).replace(":", "-"));
final Set<String> itSet = new HashSet<>();
Optional
.ofNullable(input.getInstance())
.ifPresent(
value -> value
.forEach(v -> {
Optional
.ofNullable(v.getHostedby())
.ifPresent(hb -> hbSet.add(hb.getValue()));
Optional
.ofNullable(v.getUrl())
.ifPresent(u -> u.forEach(url -> urlSet.add(url)));
Optional
.ofNullable(v.getInstancetype())
.ifPresent(it -> itSet.add(it.getClassname()));
}));
externals
.add(
KeyValue
.newInstance(
"Language", Optional
.ofNullable(input.getLanguage())
.map(value -> {
String lang = value.getClassname();
if(lang.toLowerCase().equals(Constants.UNKNOWN)){
return "";
}
return lang;
})
.orElse("")));
List<StructuredProperty> iTitle = Optional
.ofNullable(input.getTitle())
.map(
value -> value
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList()))
.orElse(new ArrayList<>());
if (iTitle.size() > 0) {
out.setTitle(textReplacement(iTitle.get(0).getValue()));
} else {
out.setTitle("");
}
Optional
.ofNullable(input.getPid())
.ifPresent(
value -> value
.forEach(
v -> {
if (v.getQualifier().getClassid().equalsIgnoreCase("DOI")) {
externals
.add(
KeyValue
.newInstance("Identity:PID", "https://www.doi.org/" + v.getValue()));
} else {
externals
.add(
KeyValue
.newInstance(
"Identity:PID",
v.getQualifier().getClassid() + ":" + v.getValue()));
}
}));
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Publication Date", Optional
// "Publication Date", Optional
.ofNullable(input.getDateofacceptance())
.map(value -> value.getValue())
.orElse("")));
String publisher = Optional
.ofNullable(input.getPublisher())
.map(value -> value.getValue())
.orElse("");
if (!publisher.equals("")) {
groups.add(publisher.toLowerCase().replace(".", "-"));
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Publisher", publisher));
}
Set<String> tagsSet = new HashSet<>();
Optional
.ofNullable(input.getSubject())
.ifPresent(
value -> value
.forEach(
s -> {
String classId = s.getQualifier().getClassid();
String prefix = "";
if (!(classId.equals("keyword") || classId.toLowerCase().equals(Constants.UNKNOWN)) &&
StringUtils.isNotEmpty(classId)) {
prefix = classId + ".";
}
String tag = prefix + s.getValue();
tag = tagReplacements(tag);
tagsSet.add(tag);
}));
cfSet.remove("Unknown Repository");
externals.add(KeyValue.newInstance("Risis2_Publishing:Collected From", getListOfValues(cfSet)));
hbSet.remove("Unknown Repository");
externals.add(KeyValue.newInstance("Risis2_Publishing:Hosted By", getListOfValues(hbSet)));
cfSet.forEach(cf -> groups.add(cf.toLowerCase().replace(".", "-")));
hbSet.forEach(hb -> groups.add(hb.toLowerCase().replace(".", "-")));
groups.forEach(g -> {
if (publishers.contains(g.trim())) {
groupList.add(Group.newInstance(null, g.trim()));
}
});
out.setGroups(groupList);
urlSet.stream().forEach(url -> externals.add(KeyValue.newInstance("Identity:URL", url)));
externals.add(KeyValue.newInstance("Country", getListOfValues(countrySet)));
externals.add(KeyValue.newInstance("Format", getListOfValues(formatSet)));
externals.add(KeyValue.newInstance("Resource Type", getListOfValues(itSet)));
List<Tag> tags = new ArrayList<>();
List<String> kws = new ArrayList<>();
tagsSet.forEach(tag -> {
if (tag.endsWith(",") || tag.endsWith(";") || tag.endsWith(".")) {
tag = tag.substring(0, tag.length() - 1);
}
if (tag.matches("^[a-zA-Z0-9_. -]*$") && tag.length() > 1 && tag.length() < 101) {
tags.add(Tag.newInstance(tag));
} else {
kws.add(tag);
}
});
out.setTags(tags);
if (kws.size() > 0) {
kws.forEach(k -> externals.add(KeyValue.newInstance("keyword", k)));
}
out.setExtras(externals);
}
if (out == null)
map.get("void_records").add(1);
map.get("dumped_records").add(1);
return out;
}
public static String tagReplacements(String tag) {
return tag
.replace("&", " and ")
// .replace(" ", "_")
.replace("(", "_")
.replace(")", "_")
.replace("/", "_")
.replace("\\", "_")
.replace("[", "")
.replace("]", "")
.trim();
}
public static String textReplacement(String text){
return text
.replace("", "\"").replace("", "\"")
.replace("", "\"").replace("", "\"");
}
private static String getAuthor(Author v) {
String author = v.getFullname();
Optional<List<StructuredProperty>> oPid = Optional.ofNullable(v.getPid());
if (oPid.isPresent()) {
List<String> oList = oPid
.get()
.stream()
.filter(
p -> p
.getQualifier()
.getClassid()
.equalsIgnoreCase("orcid"))
.map(o -> o.getValue())
.collect(Collectors.toList());
if (oList.size() > 0)
author += ", " + oList.get(0);
}
return author;
}
private static String getListOfValues(Set<String> cfSet) {
StringWriter sw = new StringWriter();
cfSet.forEach(value -> sw.append(value + "; "));
return sw.toString().length() > 0 ? sw.toString().substring(0, sw.toString().length() - 2) : "";
}
private static void getDescription(CatalogueEntry out, List<KeyValue> externals, List<Field<String>> value) {
Iterator<Field<String>> it = value.iterator();
if (it.hasNext()) {
out.setNotes(textReplacement(it.next().getValue()));
} else {
out.setNotes("");
}
it.forEachRemaining(v -> externals.add(KeyValue.newInstance("Description", v.getValue())));
}
}

View File

@ -0,0 +1,97 @@
package eu.dnetlib.dhp.oa.graph.dump.gcat;
import java.io.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.http.HttpStatus;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class SendToCatalogue implements Serializable {
private static final Log log = LogFactory.getLog(SendToCatalogue.class);
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SendToCatalogue.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/gcat/catalogue_parameters.json")));
parser.parseArgument(args);
final String access_token = parser.get("gcattoken");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode");
final String gcatBaseUrl = parser.get("gcatBaseUrl");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles(
new Path(hdfsPath), true);
GCatAPIClient gCatAPIClient = new GCatAPIClient();
gCatAPIClient.setApplicationToken(access_token);
gCatAPIClient.setGcatBaseURL(gcatBaseUrl);
int purged = gCatAPIClient.purgeAll();
log.info("purged: " + purged);
while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath();
String p_string = p.toString();
if (!p_string.endsWith("_SUCCESS")) {
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
// String name = tmp.substring(tmp.lastIndexOf("/") + 1);
// log.info("Copying information for : " + name);
// fileSystem.copyToLocalFile(p, new Path("/tmp/" + name));
// try {
// InputStream in = new GZIPInputStream(new FileInputStream("/tmp/" + name));
// BufferedReader reader = new BufferedReader(
// new InputStreamReader(in));
FSDataInputStream in = fileSystem.open(p);
GZIPInputStream gis = new GZIPInputStream(in);
BufferedReader reader = new BufferedReader(new InputStreamReader(gis));
String line;
while ((line = reader.readLine()) != null) {
try {
gCatAPIClient.publish(line);
} catch (Exception e) {
log.error("ERROR_FOR " + line);
}
}
reader.close();
// in.close();
// } finally {
// log.info("deleting information for: " + name);
// File f = new File("/tmp/" + name);
// if (f.exists()) {
// f.delete();
// }
}
}
}
}
//}

View File

@ -0,0 +1,148 @@
package eu.dnetlib.dhp.oa.graph.dump.gcat;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result;
public class SparkDumpRISISCatalogue implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpRISISCatalogue.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpRISISCatalogue.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/gcat/dump_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String communityName = parser.get("communityName");
log.info("communityName: {}", communityName);
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
Map<String, LongAccumulator> map = new HashMap<>();
map.put("dumped_records", spark.sparkContext().longAccumulator("dumped_records"));
map.put("send_to_dump_records", spark.sparkContext().longAccumulator("send_to_dump_records"));
map.put("skipped_records", spark.sparkContext().longAccumulator("skipped_records"));
map.put("void_records", spark.sparkContext().longAccumulator("void_records"));
execDump(
spark, inputPath, outputPath, inputClazz, communityName, map);// ,
// dumpClazz);
log.info("records send to dump: {}", map.get("send_to_dump_records").value());
log.info("skipped records : {}", map.get("skipped_records").value());
log.info("dumped_records : {}", map.get("dumped_records").value());
});
}
public static <I extends Result, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> void execDump(SparkSession spark,
String inputPath,
String outputPath,
Class<I> inputClazz,
String communityName,
Map<String, LongAccumulator> map) {// Class<O> dumpClazz) {
// Set<String> communities = communityMap.keySet();
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, CatalogueEntry>) value -> execMap(
value, communityName, map),
Encoders.bean(CatalogueEntry.class))
.filter(Objects::nonNull)
.coalesce(1)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static <I extends Result> CatalogueEntry execMap(I value, String community,
Map<String, LongAccumulator> map) {
if (value.getDataInfo().getDeletedbyinference() || value.getDataInfo().getInvisible()) {
// map.get("skipped_records").add(1);
return null;
}
Optional<List<Context>> inputContext = Optional.ofNullable(value.getContext());
if (!inputContext.isPresent()) {
map.get("skipped_records").add(1);
return null;
}
if (inputContext.get().stream().map(c -> {
String id = c.getId();
if (id.contains("::")) {
return id.substring(0, id.indexOf("::"));
}
return id;
}).collect(Collectors.toList()).contains(community)) {
map.get("send_to_dump_records").add(1);
return Mapper.map(value, map);
}
map.get("skipped_records").add(1);
return null;
// List<String> toDumpFor = inputContext.get().stream().map(c -> {
// String id = c.getId();
// if (id.contains("::")) {
// id = id.substring(0, id.indexOf("::"));
// }
// if (community.equals(id)) {
// dumpedRecords.add(1);
// return id;
// }
// return null;
// }).filter(Objects::nonNull).collect(Collectors.toList());
// if (toDumpFor.size() == 0) {
// skippedRecords.add(1);
// return null;
// }
// return Mapper.map(value);
}
}

View File

@ -0,0 +1,62 @@
[
{
"name": "open",
"title": "Open Access",
"description": "Open access refers to a resource that is immediately and permanently online, and free for all on the Web, without financial and technical barriers.The resource is either stored in the repository or referenced to an external journal or trustworthy archive.",
"image_url": "https://creativecommons.org/wp-content/uploads/2016/05/open-access-logo.png"
},
{
"name": "closed",
"title": "Metadata-only Access",
"description": "Metadata only access refers to a resource in which access is limited to metadata only. The resource itself is described by the metadata, but neither is directly available through the system or platform nor can be referenced to an open access copy in an external journal or trustworthy archive.",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/0/0e/Closed_Access_logo_transparent.svg"
},
{
"name": "restricted",
"title": "Restricted Access",
"description": "Restricted access refers to a resource that is available in a system but with some type of restriction for full open access. This type of access can occur in a number of different situations. Some examples are described below: The user must log-in to the system in order to access the resource The user must send an email to the author or system administrator to access the resource Access to the resource is restricted to a specific community (e.g. limited to a university community)",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/3/3d/Mixed_Access_logo_PLoS_transparent.svg"
},
{
"name": "embargoed",
"title": "Under embargo",
"description": "Embargoed access refers to a resource that is metadata only access until released for open access on a certain date. Embargoes can be required by publishers and funders policies, or set by the author (e.g such as in the case of theses and dissertations).",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/1/16/Lock-green-clock.svg"
},
{
"name": "datacite",
"title": "Datacite",
"description": "Research results available via Datacite, a leading global non-profit organisation that provides persistent identifiers (DOIs) for research data and other research outputs.",
"image_url":"https://assets.datacite.org/images/logo-big.png"
},
{
"name": "zenodo",
"title": "ZENODO",
"description": "Research results available via Zenodo. Zenodo is a general purpose repository that enables researchers, scientists, projects and institutions to share, preserve and showcase multidisciplinary research results (data, software and publications) that are not part of the existing institutional or subject-based repositories of the research communities. It is founded in the trustworthy CERN data centre.",
"image_url":"https://about.zenodo.org/static/img/logos/zenodo-gradient-1000.png"
},
{
"name": "hal",
"title": "Hyper Article en Ligne",
"description": "Research results available via Hyper Article en Ligne (HAL). HAL is an open archive where authors can deposit scholarly documents from all academic fields.",
"image_url":"https://hal.archives-ouvertes.fr/public/hal.logo.png"
},
{
"name": "figshare",
"title": "figshare",
"description": "Research results available via figshare, a repository where users can make all of their research outputs available in a citable, shareable and discoverable manner",
"image_url":"https://website-p-eu.figstatic.com/assets/776d94c0a5a92799ce5536fd94e8e3c2d759a3c2/public/global/images/full-logo.png"
},
{
"name": "digital.csic",
"title": "DIGITAL.CSIC",
"description": "Research results available via DIGITAL.CSIC, the institutional repository of the Spanish National Research Council",
"image_url":"https://digital.csic.es/imagenes/logo_DC_peque.png"
},
{
"name": "dans",
"title": "DANS - Data Archiving and Networked Services",
"description": "Research results available via DANS, the Netherlands institute for permanent access to digital research resources",
"image_url":"https://easy.dans.knaw.nl/ui/images/lay-out/logo_dans.png"
}
]

View File

@ -0,0 +1,30 @@
[
{
"paramName":"gct",
"paramLongName":"gcattoken",
"paramDescription": "the token for the deposition on the catalogue",
"paramRequired": true
},
{
"paramName": "p",
"paramLongName": "hdfsPath",
"paramDescription": "the path where storing the sequential file",
"paramRequired": true
},
{
"paramName": "nn",
"paramLongName": "hdfsNameNode",
"paramDescription": "the name node on hdfs",
"paramRequired": true
},
{
"paramName": "gbu",
"paramLongName": "gcatBaseUrl",
"paramDescription": "the base url for the catalogue",
"paramRequired": true
}
]

View File

@ -0,0 +1,38 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"cm",
"paramLongName":"communityName",
"paramDescription": "the name of the community for which to execute the dump to the catalogue",
"paramRequired": true
}
]

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,230 @@
<workflow-app name="dump_community_products_to_catalogue" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>communityName</name>
<description>The name of the community for which execute the dump for the catalogue</description>
</property>
<property>
<name>gcattoken</name>
<description>the access token for the deposition on the cataloge</description>
</property>
<property>
<name>gcatBaseUrl</name>
<description>the baseUrl to access the catalogue</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
</property>
<property>
<name>hiveJdbcUrl</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="reset_outputpath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="reset_outputpath">
<fs>
<delete path="${workingDir}"/>
<mkdir path="${workingDir}"/>
</fs>
<ok to="fork_dump"/>
<error to="Kill"/>
</action>
<fork name="fork_dump">
<path start="dump_publication"/>
<path start="dump_dataset"/>
<path start="dump_orp"/>
<path start="dump_software"/>
</fork>
<action name="dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication for RISIS related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset for RISIS related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table other for RISIS related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software for RISIS related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="populate_catalogue"/>
<action name="populate_catalogue">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.gcat.SendToCatalogue</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--gcattoken</arg><arg>${gcattoken}</arg>
<arg>--gcatBaseUrl</arg><arg>${gcatBaseUrl}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,128 @@
package eu.dnetlib.dhp.oa.graph.gcat;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue;
public class DumpJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
private static HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(DumpJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(DumpJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void testSoftware() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/software.json")
.getPath();
SparkDumpRISISCatalogue.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-communityName", "risis"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
Assertions.assertEquals(3, verificationDataset.count());
verificationDataset.show(false);
// verificationDataset.select("instance.type").show(false);
}
@Test
public void testDataset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/dataset_for_dump.json")
.getPath();
SparkDumpRISISCatalogue.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-communityName", "science-innovation-policy"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
Assertions.assertEquals(2, verificationDataset.count());
verificationDataset.show(false);
// verificationDataset.select("instance.type").show(false);
}
}

View File

@ -0,0 +1,231 @@
package eu.dnetlib.dhp.oa.graph.gcat;
import java.io.*;
import java.net.URISyntaxException;
import java.util.List;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpStatus;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.graph.dump.gcat.GCatAPIClient;
/**
* NEVER EVER ENABLE THIS CLASS UNLESS YOU ABSOLUTELY KNOW WHAT YOU ARE DOING: with the proper parameters set it can
* drop a D4Science Catalogue
*/
//@Disabled
public class GCatAPIClientTest {
private static GCatAPIClient client;
@BeforeAll
public static void setup() {
client = new GCatAPIClient();
client.setApplicationToken("816486a3-60a9-4ecc-a7e0-a96740a90207-843339462");
client.setGcatBaseURL("https://gcat.d4science.org/gcat/");
}
@Test
public void testList() throws IOException, URISyntaxException {
System.out.println(client.list(0, 10));
}
@Test
public void testPublishAndPurge() throws IOException, URISyntaxException {
// The 'name' must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, '-'
// and '_'.
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
String objidentifier = "fake";
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat.json"));
System.out.println("Creating item...");
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
System.out.println("item created, now listing...");
Assertions.assertEquals(1, client.list(0, 10).size());
// and then drop it
Assertions.assertTrue(client.purge(objidentifier));
System.out.println("item purged");
}
@Test
public void testPublish() throws IOException, URISyntaxException {
// The 'name' must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, '-'
// and '_'.
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
String json = IOUtils
.toString(
getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_software_20201130.json"));
System.out.println("Creating item...");
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
System.out.println("item created, now listing...");
// Assertions.assertEquals(1, client.list(0, 10).size());
}
@Test
public void bulkPublishORP() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_other.json")
.getPath()));
String line;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void bulkPublishDATS() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/dats_20201126")
.getPath()));
String line;
int count = 1;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
System.out.println(count);
count++;
}
}
@Test
public void bulkPublishCompressedSW() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(
new InputStreamReader(new GZIPInputStream(new FileInputStream(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/software_20201130.gz")
.getPath()))));
String line;
int count = 1;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
System.out.println(count);
count++;
}
}
@Test
public void bulkPublishPUBS() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_publications")
.getPath()));
String line;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void purgeItem() throws IOException, URISyntaxException {
String objidentifier = "dedup_wf_001--10160b3eafcedeb0a384fc400fe1c3fa";
Assertions.assertTrue(client.purge(objidentifier));
System.out.println("item purged");
}
@Test
public void testPurgeUnexisting() throws IOException, URISyntaxException {
String id = "1234";
Assertions.assertFalse(client.purge(id));
}
@Test
public void testPurgeAllEmptyCat() throws IOException, URISyntaxException {
Assertions.assertEquals(179, client.purgeAll());
}
@Test
public void testPublishAndPurgeAll() throws IOException, URISyntaxException {
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_pub.json"));
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
System.out.println("item created, now listing...");
Assertions.assertEquals(1, client.list(0, 10).size());
// and then drop all
Assertions.assertEquals(1, client.purgeAll());
}
@Test
public void purgeList() throws IOException, URISyntaxException {
List<String> toPurge = Lists.newArrayList();
toPurge.add("dedup_wf_001--f20bb2f571f1fdcb9a66bec850a8267e");
toPurge.add("od_______166--c5caa1b39d9c7998f0f7c37f948ea097");
toPurge.add("od______2659--d956cae2b4a87eeaae4291530dfc88cf");
toPurge.add("od______2659--50fdc84d38782630c8fe73ca66e4e1e9");
toPurge.add("od______2659--1bbce6c3a47aa79cfc2c2973842d0c2c");
toPurge.add("od______2659--037571fc3efb68d43ae8a4d8078ddd82");
toPurge.add("od______2659--7bd3645a599d5c9fd5eb0a5c87b79948");
toPurge.add("od______3379--50842bb0a03d644a6ed831c0a59d25f4");
toPurge.add("od_______177--50842bb0a03d644a6ed831c0a59d25f4");
toPurge.add("od______4325--d6ad4e4111afd06f69c1597a60f09cef");
toPurge.add("od______3379--d6ad4e4111afd06f69c1597a60f09cef");
toPurge.add("od_______177--d6ad4e4111afd06f69c1597a60f09cef");
toPurge.add("od______4325--50842bb0a03d644a6ed831c0a59d25f4");
toPurge.add("od______4325--b823dc448d06160da67ebdcd1a67c544");
toPurge.add("od_______177--b823dc448d06160da67ebdcd1a67c544");
toPurge.add("od______1106--7407d45261b901f936319762b30a66f0");
toPurge.add("od______3379--b823dc448d06160da67ebdcd1a67c544");
toPurge.add("od______4325--655f1b9517a0dd16efd05b572f66927b");
toPurge.add("od______3379--655f1b9517a0dd16efd05b572f66927b");
toPurge.add("od_______177--655f1b9517a0dd16efd05b572f66927b");
toPurge.add("od______3379--308718c4498f1c857d9dec8fc8412bed");
toPurge.add("od_______177--308718c4498f1c857d9dec8fc8412bed");
toPurge.add("od______4325--308718c4498f1c857d9dec8fc8412bed");
toPurge.add("od______4325--26025853fbcb01858d58e3c268d144ce");
toPurge.add("od_______177--26025853fbcb01858d58e3c268d144ce");
toPurge.add("od______3379--4af67cb057f92a8c276c3aae56980430");
toPurge.add("od_______177--4af67cb057f92a8c276c3aae56980430");
toPurge.add("od______4325--4af67cb057f92a8c276c3aae56980430");
toPurge.add("od______3379--1c384c7771d6d5ec0b2b14264d0af8cd");
toPurge.add("od_______177--1c384c7771d6d5ec0b2b14264d0af8cd");
toPurge.add("od______4325--1c384c7771d6d5ec0b2b14264d0af8cd");
toPurge.add("od_______177--15a65e2433929cc77ae7b10fd56c1e9e");
toPurge.add("od______4325--15a65e2433929cc77ae7b10fd56c1e9e");
toPurge.add("od______3379--15a65e2433929cc77ae7b10fd56c1e9e");
toPurge.add("od_______177--c5caa1b39d9c7998f0f7c37f948ea097");
toPurge.add("od______3379--c5caa1b39d9c7998f0f7c37f948ea097");
toPurge.add("od_______177--11df09fd7a9ad36f0de546ea991182ce");
toPurge.add("od______3379--11df09fd7a9ad36f0de546ea991182ce");
toPurge.add("od______4325--11df09fd7a9ad36f0de546ea991182ce");
toPurge.add("dedup_wf_001::2fb2a1e02bfae184789181009d59232c");
toPurge.add("od_______177--39688c4bc9fd811e55f914e58701536d");
toPurge.add("od______3379--39688c4bc9fd811e55f914e58701536d");
toPurge.add("od______4325--39688c4bc9fd811e55f914e58701536d");
toPurge.add("od______4325--200c9c70dff2e86dad6ba555381027fc");
toPurge.add("od______3379--200c9c70dff2e86dad6ba555381027fc");
toPurge.add("od_______177--200c9c70dff2e86dad6ba555381027fc");
toPurge.add("od______3379--cf54f6149e7427d77dd37fccc3a0c747");
toPurge.add("od______3379--1f50518c04e2c0966425a350def1f82a");
toPurge.add("od______4325--1f50518c04e2c0966425a350def1f82a");
toPurge.add("od_______177--1f50518c04e2c0966425a350def1f82a");
toPurge.add("od______3379--bff6c5b35457f7f51d52d41323226663");
toPurge.add("od_______177--bff6c5b35457f7f51d52d41323226663");
toPurge.add("od______4325--bff6c5b35457f7f51d52d41323226663");
toPurge.add("od______2659--cbdcbe3865868c15680d95c7f83c3ff7");
toPurge.add("dedup_wf_001--569da4f719f51eb07f23548000e57d72");
toPurge.add("od_______177--bf88e4876d9c5a9720ca3fefe3ce93ea");
toPurge.add("od______3379--bf88e4876d9c5a9720ca3fefe3ce93ea");
toPurge.add("od______4325--bf88e4876d9c5a9720ca3fefe3ce93ea");
for (String name : toPurge) {
System.out.println("Purging " + name);
if (client.purge(name))
System.out.println("Purged");
else {
System.out.println("Failed");
}
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,2 @@
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Lepori, Benedetto"},{"key":"Risis2_Attribution:Author","value":"Guerini, Massimilano"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752860"},{"key":"Risis2_Publishing:Publication Date","value":"2020-04-15"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"Zenodo; ZENODO; Datacite"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:URL","value":"https://zenodo.org/record/3752861"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752860"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Dataset"}],"groups":[{"name":"open"},{"name":"zenodo"},{"name":"datacite"}],"license_id":"notspecified","name":"dedup_wf_001--c4634a42d4b98e594e0796a41b47ec61","notes":"<p>This file provides the correspondence table between EUROSTAT NUTS3 classification and the adapted regional classification used by the RISIS-KNOWMAK project. This regional classification fits the structure of knowledge production in Europe and addresses some knowm problems of the NUTS3 classification, such as the treatment of large agglomerations, while remaining fully compatible with the EUROSTAT NUTS regional classification. This compatibility allows combining all KNOWMAK data with regional statistics (at NUTS3 level, 2016 edition) from EUROSTAT.</p>\n\n<p>More precisely, the classification includes EUROSTAT metropolitan regions (based on the aggregation of NUTS3-level regions) and NUTS2 regions for the remaining areas; further, a few additional centers for knowledge production, like Oxford and Leuven, have been singled out at NUTS3 level. The resulting classification is therefore more fine-grained than NUTS2 in the areas with sizeable knowledge production, but at the same time recognizes the central role of metropolitan areas in knowledge production. While remaining compatible with NUTS, the classification allows addressing two well-known shortcomings: a) the fact that some large cities are split between NUTS regions (London) and b) the fact that NUTS3 classification in some countries includes many very small regions, as in the case of Germany</p>","tags":[],"title":"RISIS-KNOWMAK NUTS adapted classification","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::c4634a42d4b98e594e0796a41b47ec61","version":""}
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Laredo, Philippe, 0000-0002-5014-9132"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560117"},{"key":"Risis2_Publishing:Publication Date","value":"2019-02-08"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"ZENODO; Datacite; figshare"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO; figshare"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560117"},{"key":"Identity:URL","value":"https://zenodo.org/record/2560117"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:URL","value":"https://figshare.com/articles/Introduction_of_RISIS_project_by_Philippe_Laredo/7699286"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Audiovisual"}],"groups":[{"name":"open"},{"name":"zenodo"},{"name":"figshare"},{"name":"datacite"}],"license_id":"notspecified","name":"dedup_wf_001--10160b3eafcedeb0a384fc400fe1c3fa","notes":"<p>Introduction of RISIS project by Philippe Laredo</p>","tags":[],"title":"Introduction of RISIS project by Philippe Laredo","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::10160b3eafcedeb0a384fc400fe1c3fa","version":"None"}

View File

@ -0,0 +1 @@
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Akol, Angela, 0000-0003-4594-3478"},{"key":"Risis2_Attribution:Author","value":"Moland, Karen"},{"key":"Risis2_Attribution:Author","value":"Babirye, Juliet"},{"key":"Risis2_Attribution:Author","value":"Engebretsen, Ingunn, 0000-0001-5852-3611"},{"key":"AccessMode:Access Right","value":"not available"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"Undetermined"},{"key":"Identity:PID","value":"https://www.doi.org/10.6084/m9.figshare.c.4064003"},{"key":"Identity:PID","value":"https://www.doi.org/10.6084/m9.figshare.c.4064003.v1"},{"key":"Risis2_Publishing:Publication Date","value":"2018-04-10"},{"key":"Risis2_Publishing:Publisher","value":"Figshare"},{"key":"Risis2_Publishing:Collected From","value":"Datacite"},{"key":"Risis2_Publishing:Hosted By","value":"figshare"},{"key":"Identity:URL","value":"https://dx.doi.org/10.6084/m9.figshare.c.4064003.v1"},{"key":"Identity:URL","value":"https://dx.doi.org/10.6084/m9.figshare.c.4064003"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Dataset"},{"key":"keyword","value":"FOS: Biological sciences"}],"groups":[{"name":"figshare"},{"name":"datacite"}],"license_id":"notspecified","name":"dedup_wf_001--7151b1070802f6ed0ced85a5b175b368","notes":"Abstract Background Early identification and management of mental illness in childhood and adolescence helps to avert debilitating mental illness in adulthood but the attention given to Child and Adolescent Mental Health (CAMH) has until recently been low. Traditional healers are often consulted by patients with mental illness and in Uganda, up to 60% of patients attending traditional healers have moderate to severe mental illness. Poor access to CAMH care in Uganda creates a treatment gap that could be met through enhanced collaboration between traditional healers and biomedical health systems. The aim of this study was to explore traditional healers views on their collaboration with biomedical health systems so as to inform the implementation of strategies to improve access to CAMH services in Uganda. Methods In-depth interviews with 20 purposively selected traditional healers were conducted in November 2015. A semi-structured interview guide was used to explore: 1) The experiences of traditional healers with mental ill-health in children and adolescents; 2) their willingness to collaborate with the formal health system; and 3) their perception of clinicians willingness to collaborate with them. Interviews were conducted in local languages and tape recorded. Data were analysed using thematic analysis. Results Traditional healers described several experiences managing children and adolescents with mental illness, which they ascribed to spiritual and physical causes. The spiritual explanations were a consequence of unhappy ancestral spirits, modern religions and witchcraft, while physical causes mentioned included substance abuse and fevers. No traditional healer had received a patient referred to them from a medical clinic although all had referred patients to clinics for non-mental health reasons. Traditional healers expressed distrust in biomedical health systems and believed their treatments were superior to medical therapies in alleviating mental suffering. They expressed willingness to collaborate with biomedical providers. However, traditional healers believe clinicians disregard them and would not be willing to collaborate with them. Conclusion Potential for collaboration between traditional healers and biomedical health systems for improving access to CAMH services in Uganda exists, but is undermined by mutual mistrust and competition between traditional healers and clinicians.","tags":[{"name":"Medicine"},{"name":"Biotechnology"},{"name":"69999 Biological Sciences not elsewhere classified"},{"name":"mesheuropmc.education"},{"name":"Cancer"},{"name":"Science Policy"}],"title":"“We are like co-wives”: Traditional healers' views on collaborating with the formal Child and Adolescent Mental Health System in Uganda","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::7151b1070802f6ed0ced85a5b175b368","version":""}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,68 @@
{
"name": "nstest--test",
"private": false,
"license_id": "notspecified",
"version": "",
"title": "The role of R&D networks for exploitative and explorative regional knowledge creation",
"notes": "<p>The beneficial effect of R&amp;D networks on regional knowledge creation is widely undenied. They constitute essential means to create new knowledge through collaborative research efforts and enable access to new knowledge by bridging the way to region-external knowledge bases. However, we argue that the significance and strength of the effect differs for different modes of knowledge creation &ndash; exploitative and explorative &ndash; as well as for the quantity and quality of knowledge created. To explore these differences, we estimate a set of spatial autoregressive (SAR) models for European regions with varying network effects that are based on a region&rsquo;s network centrality in the cross-region R&amp;D network of the EU Framework Programme (FP). The results point consistently to a higher positive impact of reginal network centralities on explorative than exploitative knowledge creation. Moreover, the quantity and quality of newly created knowledge is found to be conversely affected by the regional network centralities considered. Interestingly, a high number of links (degree centrality) has in relative terms higher positive effects on the quality, rather than the pure quantity of knowledge outputs, while an authoritative network position is more conducive for increasing the quantity than the quality of knowledge.</p>",
"url": "https://beta.risis.openaire.eu/search/publication?articleId=od______2659::155332689ed5defb5d9a68a42fd8cd14",
"maintainer": "",
"extras": [
{
"key": "Publisher",
"value": "Zenodo"
},
{
"key": "Access right",
"value": "Open Access"
},
{
"key": "Collected from",
"value": "ZENODO"
},
{
"key": "PID",
"value": "doi:10.5281/zenodo.3724562"
},
{
"key": "Author",
"value": "Neuländtner, Martina"
},
{
"key": "Author",
"value": "Scherngell, Thomas"
},
{
"key": "Type",
"value": "publication"
},
{
"key": "Language",
"value": "Undetermined"
},
{
"key": "Country",
"value": ""
},
{
"key": "Subject",
"value": "R&D networks, modes of knowledge creation, exploitation, exploration, spatial autoregressive model"
},
{
"key": "Publication date",
"value": "2020-01-01"
},
{
"key": "Resource type",
"value": ""
},
{
"key": "URL",
"value": "http://dx.doi.org/10.5281/zenodo.3724562"
},
{
"key": "Hosted by",
"value": "ZENODO"
}
]
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,92 @@
{
"extras": [
{
"key": "system:type",
"value": "dataset"
},
{
"key": "Risis2_Attribution:Author",
"value": "Laredo, Philippe, 0000-0002-5014-9132"
},
{
"key": "AccessMode:Access Right",
"value": "Open Access"
},
{
"key": "Risis2_Attribution:Contributor",
"value": "European Commission"
},
{
"key": "AccessMode:Embargo End Date",
"value": ""
},
{
"key": "Language",
"value": "English"
},
{
"key": "Identity:PID",
"value": "https://www.doi.org/10.5281/zenodo.2560116"
},
{
"key": "Identity:PID",
"value": "https://www.doi.org/10.5281/zenodo.2560117"
},
{
"key": "Risis2_Publishing:Publication Date",
"value": "2019-02-08"
},
{
"key": "Risis2_Publishing:Publisher",
"value": "Zenodo"
},
{
"key": "Risis2_Publishing:Collected From",
"value": "ZENODO; Datacite; figshare"
},
{
"key": "Risis2_Publishing:Hosted By",
"value": "Zenodo; ZENODO; figshare"
},
{
"key": "Identity:URL",
"value": "http://dx.doi.org/10.5281/zenodo.2560117"
},
{
"key": "Identity:URL",
"value": "https://zenodo.org/record/2560117"
},
{
"key": "Identity:URL",
"value": "http://dx.doi.org/10.5281/zenodo.2560116"
},
{
"key": "Identity:URL",
"value": "https://figshare.com/articles/Introduction_of_RISIS_project_by_Philippe_Laredo/7699286"
},
{
"key": "Country",
"value": ""
},
{
"key": "Format",
"value": ""
},
{
"key": "Resource Type",
"value": "Audiovisual"
}
],
"groups": [
{"name": "open"},
{"name": "zenodo"},
{"name": "figshare"}
],
"license_id": "notspecified",
"name": "dedup_wf_001--10160b3eafcedeb0a384fc400fe1c3fa",
"notes": "<p>Introduction of RISIS project by Philippe Laredo<\/p>",
"tags": [],
"title": "Introduction of RISIS project by Philippe Laredo",
"url": "https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::10160b3eafcedeb0a384fc400fe1c3fa",
"version": "None"
}

File diff suppressed because one or more lines are too long