forked from D-Net/dnet-hadoop
Compare commits
57 Commits
Author | SHA1 | Date |
---|---|---|
Miriam Baglioni | 10b60ede5d | |
Miriam Baglioni | 8b83d47bc1 | |
Miriam Baglioni | 5df74ebe9c | |
Miriam Baglioni | 24bbd30e80 | |
Miriam Baglioni | 9d5e6eaf3f | |
Miriam Baglioni | 5983f6ffc6 | |
Miriam Baglioni | 818665451f | |
Miriam Baglioni | 6b8e947bcf | |
Miriam Baglioni | f6b7c297a8 | |
Miriam Baglioni | 6962cd7c18 | |
Miriam Baglioni | 00018711d8 | |
Miriam Baglioni | 4e494f5152 | |
Miriam Baglioni | d77d213d7c | |
Miriam Baglioni | b1dad7959c | |
Miriam Baglioni | 7ddd8590d0 | |
Alessia Bardi | a9dc05d8c7 | |
Alessia Bardi | d0f9891355 | |
Alessia Bardi | 3600d39f96 | |
Alessia Bardi | 0c9539a301 | |
Alessia Bardi | 3d3178d4e8 | |
Miriam Baglioni | 1c212cb6b9 | |
Miriam Baglioni | a7b26d7f07 | |
Miriam Baglioni | 0d10e3bd22 | |
Miriam Baglioni | 2d380aea1d | |
Miriam Baglioni | 566a763175 | |
Miriam Baglioni | daa5d933e0 | |
Miriam Baglioni | e71e857e48 | |
Miriam Baglioni | 9864bff488 | |
Alessia Bardi | 2c3b92ff10 | |
Miriam Baglioni | 42ee1ef284 | |
Alessia Bardi | 474ae69df8 | |
Miriam Baglioni | 952a4a4482 | |
Miriam Baglioni | 563378ce3f | |
Miriam Baglioni | d6838e18e6 | |
Miriam Baglioni | de62582c28 | |
Alessia Bardi | 743d948d1c | |
Miriam Baglioni | a2aa3c5b67 | |
Alessia Bardi | fcabee9242 | |
Miriam Baglioni | 2d9811ac4c | |
Alessia Bardi | 71ef7d9e66 | |
Miriam Baglioni | 60a3206de5 | |
Miriam Baglioni | f12b1ede24 | |
Alessia Bardi | b762c28cb6 | |
Miriam Baglioni | 844948f3e0 | |
Miriam Baglioni | 33e2ebeaaa | |
Alessia Bardi | a27b93859e | |
Miriam Baglioni | 3da12be81f | |
Alessia Bardi | d9c07eb800 | |
Miriam Baglioni | 1566fd590e | |
Miriam Baglioni | 004bf225cb | |
Miriam Baglioni | e983d02c1c | |
Miriam Baglioni | b570f011d1 | |
Miriam Baglioni | d133368d2d | |
Miriam Baglioni | c84736fa56 | |
Miriam Baglioni | 25a7205549 | |
Miriam Baglioni | 06b03840bd | |
Alessia Bardi | ec19fcace0 |
|
@ -0,0 +1,92 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.gcat;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
|
||||||
|
public class CatalogueEntry implements Serializable {
|
||||||
|
|
||||||
|
private String name; // openaire id withouut :: substitute with $$
|
||||||
|
private String license_id; // default "notspecified",
|
||||||
|
private String title; // title.maintitle
|
||||||
|
private String notes; // description.value (the first description
|
||||||
|
private String url; // the url of the resource in the openaire dashboard
|
||||||
|
private String version; // valid for datasets
|
||||||
|
private List<Tag> tags; // subject and keywords
|
||||||
|
private List<Group> groups; // access and publishers
|
||||||
|
private List<KeyValue> extras;
|
||||||
|
|
||||||
|
public List<Group> getGroups() {
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGroups(List<Group> groups) {
|
||||||
|
this.groups = groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Tag> getTags() {
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTags(List<Tag> tags) {
|
||||||
|
this.tags = tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getVersion() {
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVersion(String version) {
|
||||||
|
this.version = version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLicense_id() {
|
||||||
|
return license_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLicense_id(String license_id) {
|
||||||
|
this.license_id = license_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNotes() {
|
||||||
|
return notes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNotes(String notes) {
|
||||||
|
this.notes = notes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUrl(String url) {
|
||||||
|
this.url = url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<KeyValue> getExtras() {
|
||||||
|
return extras;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setExtras(List<KeyValue> extras) {
|
||||||
|
this.extras = extras;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.gcat;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Group implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Group newInstance(String id, String name) {
|
||||||
|
Group g = new Group();
|
||||||
|
if (id != null) {
|
||||||
|
g.id = id;
|
||||||
|
}
|
||||||
|
if (name != null) {
|
||||||
|
g.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.gcat;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.SerializableString;
|
||||||
|
|
||||||
|
public class Tag implements Serializable {
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Tag newInstance(String n) {
|
||||||
|
Tag t = new Tag();
|
||||||
|
t.name = n;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,8 +7,14 @@ import com.google.common.collect.Maps;
|
||||||
|
|
||||||
public class Constants {
|
public class Constants {
|
||||||
|
|
||||||
|
public static String PUBLICATION_URL = "https://science-innovation-policy.openaire.eu/search/publication?articleId=";
|
||||||
|
public static String DATASET_URL = "https://science-innovation-policy.openaire.eu/search/dataset?datasetId=";
|
||||||
|
public static String SOFTWARE_URL = "https://science-innovation-policy.openaire.eu/search/software?softwareId=";
|
||||||
|
public static String ORP_URL = "https://science-innovation-policy.openaire.eu/search/other?orpId=";
|
||||||
|
public static String DEFAULT_LICENCE_ID = "notspecified";
|
||||||
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
||||||
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
||||||
|
public static final Map<String, String> gcatCatalogue = Maps.newHashMap();
|
||||||
|
|
||||||
public static final String INFERRED = "Inferred by OpenAIRE";
|
public static final String INFERRED = "Inferred by OpenAIRE";
|
||||||
|
|
||||||
|
@ -26,6 +32,8 @@ public class Constants {
|
||||||
|
|
||||||
public static String ORCID = "orcid";
|
public static String ORCID = "orcid";
|
||||||
|
|
||||||
|
public static String UNKNOWN = "unknown";
|
||||||
|
|
||||||
static {
|
static {
|
||||||
accessRightsCoarMap.put("OPEN", "c_abf2");
|
accessRightsCoarMap.put("OPEN", "c_abf2");
|
||||||
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||||
|
@ -41,6 +49,16 @@ public class Constants {
|
||||||
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static {
|
||||||
|
gcatCatalogue.put("OPEN", "OPEN");
|
||||||
|
gcatCatalogue.put("RESTRICTED", "RESTRICTED");
|
||||||
|
gcatCatalogue.put("OPEN SOURCE", "OPEN");
|
||||||
|
gcatCatalogue.put("CLOSED", "CLOSED");
|
||||||
|
gcatCatalogue.put("EMBARGO", "EMBARGO");
|
||||||
|
gcatCatalogue.put("UNKNOWN", "UNKNOWN");
|
||||||
|
gcatCatalogue.put("OTHER", "UNKNOWN");
|
||||||
|
}
|
||||||
|
|
||||||
public enum DUMPTYPE {
|
public enum DUMPTYPE {
|
||||||
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder");
|
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder");
|
||||||
|
|
||||||
|
@ -53,5 +71,6 @@ public class Constants {
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,177 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.gcat;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.http.HttpEntity;
|
||||||
|
import org.apache.http.HttpResponse;
|
||||||
|
import org.apache.http.HttpStatus;
|
||||||
|
import org.apache.http.client.ClientProtocolException;
|
||||||
|
import org.apache.http.client.HttpClient;
|
||||||
|
import org.apache.http.client.ResponseHandler;
|
||||||
|
import org.apache.http.client.methods.HttpDelete;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.client.methods.HttpPost;
|
||||||
|
import org.apache.http.client.utils.URIBuilder;
|
||||||
|
import org.apache.http.entity.StringEntity;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.DefaultHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClientBuilder;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
import org.apache.http.util.EntityUtils;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import okhttp3.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by Alessia Bardi on 19/06/2020.
|
||||||
|
*
|
||||||
|
* @author Alessia Bardi
|
||||||
|
*/
|
||||||
|
public class GCatAPIClient {
|
||||||
|
|
||||||
|
private static final Log log = LogFactory.getLog(GCatAPIClient.class);
|
||||||
|
|
||||||
|
public static final int BULK_SIZE = 100;
|
||||||
|
private String gcatBaseURL;
|
||||||
|
private final String itemPath = "items";
|
||||||
|
private String applicationToken;
|
||||||
|
|
||||||
|
private static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
||||||
|
|
||||||
|
public GCatAPIClient() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Publish the json as in the D4science catalogue as an item.
|
||||||
|
*
|
||||||
|
* @param jsonMetadata the whole published json record
|
||||||
|
* @return the HTTP status code of the request
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
|
||||||
|
public int publish(final String jsonMetadata) throws IOException, URISyntaxException {
|
||||||
|
OkHttpClient httpCLient = new OkHttpClient();
|
||||||
|
|
||||||
|
RequestBody body = RequestBody.create(jsonMetadata, MEDIA_TYPE_JSON);
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(getGcatBaseURL() + itemPath)
|
||||||
|
.header("gcube-token", getApplicationToken())
|
||||||
|
.addHeader("Content-Type", "application/json")
|
||||||
|
.addHeader("Accept", "application/json")
|
||||||
|
.post(body)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpCLient.newCall(request).execute()) {
|
||||||
|
if (log.isDebugEnabled()) {
|
||||||
|
log.debug(response.code());
|
||||||
|
System.out.println(response.code());
|
||||||
|
log.debug(response.body().string());
|
||||||
|
}
|
||||||
|
if (!response.isSuccessful()) {
|
||||||
|
|
||||||
|
throw new IOException("Unexpected code " + response);
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.code();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List items in the catalogue
|
||||||
|
*
|
||||||
|
* @param offset offset
|
||||||
|
* @param limit limit
|
||||||
|
* @return list of catalogue item names
|
||||||
|
* @throws IOException
|
||||||
|
* @throws URISyntaxException
|
||||||
|
*/
|
||||||
|
public List<String> list(final int offset, final int limit) throws IOException, URISyntaxException {
|
||||||
|
OkHttpClient httpClient = new OkHttpClient();
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(getGcatBaseURL() + itemPath + "?offset=" + offset + "&limit=" + limit)
|
||||||
|
.header("gcube-token", getApplicationToken())
|
||||||
|
.addHeader("Content-Type", "application/json")
|
||||||
|
.addHeader("Accept", "application/json")
|
||||||
|
.get()
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (Response response = httpClient.newCall(request).execute()) {
|
||||||
|
int status = response.code();
|
||||||
|
if (status >= 200 && status < 300) {
|
||||||
|
String entity = response.body().string();
|
||||||
|
return entity != null ? new Gson().fromJson(entity, List.class) : null;
|
||||||
|
} else {
|
||||||
|
throw new ClientProtocolException("Unexpected response status: " + status);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean purge(final String resCatName) throws IOException, URISyntaxException {
|
||||||
|
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
URIBuilder builder = new URIBuilder(getGcatBaseURL() + itemPath + "/" + resCatName)
|
||||||
|
.addParameter("purge", "true");
|
||||||
|
URI uri = builder.build();
|
||||||
|
System.out.println(uri.toString());
|
||||||
|
HttpDelete del = new HttpDelete(uri);
|
||||||
|
del.setHeader("gcube-token", getApplicationToken());
|
||||||
|
del.addHeader("Content-Type", "application/json");
|
||||||
|
del.addHeader("Accept", "application/json");
|
||||||
|
HttpResponse response = client.execute(del);
|
||||||
|
if (log.isDebugEnabled()) {
|
||||||
|
log.debug(response.getStatusLine());
|
||||||
|
}
|
||||||
|
return response.getStatusLine().getStatusCode() == HttpStatus.SC_NO_CONTENT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int purgeAll() throws IOException, URISyntaxException {
|
||||||
|
int count = 0;
|
||||||
|
int deleted = 0;
|
||||||
|
int failed = 0;
|
||||||
|
List<String> list = list(0, BULK_SIZE);
|
||||||
|
do {
|
||||||
|
for (String itemName : list) {
|
||||||
|
count++;
|
||||||
|
if (purge(itemName))
|
||||||
|
deleted++;
|
||||||
|
else {
|
||||||
|
failed++;
|
||||||
|
log.warn("Deletion of item " + itemName + " failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list = list(0, BULK_SIZE);
|
||||||
|
} while (list.size() > 0);
|
||||||
|
log.info(String.format("PurgeAll completed: total = %d; deleted = %d; failed = %d", count, deleted, failed));
|
||||||
|
return deleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getGcatBaseURL() {
|
||||||
|
return gcatBaseURL;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGcatBaseURL(String gcatBaseURL) {
|
||||||
|
this.gcatBaseURL = gcatBaseURL;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getApplicationToken() {
|
||||||
|
return applicationToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setApplicationToken(String applicationToken) {
|
||||||
|
this.applicationToken = applicationToken;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,403 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.gcat;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.security.Key;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.apache.avro.generic.GenericData;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.spark.util.LongAccumulator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.gcat.Group;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.gcat.Tag;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import org.omg.CORBA.UNKNOWN;
|
||||||
|
|
||||||
|
public class Mapper implements Serializable {
|
||||||
|
|
||||||
|
private static final List<String> publishers = Arrays
|
||||||
|
.asList("zenodo", "hal", "figshare", "digital-csic", "dans", "datacite");
|
||||||
|
private static final List<String> access = Arrays.asList("open", "closed", "embargoed", "restricted");
|
||||||
|
|
||||||
|
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> CatalogueEntry map(I input,
|
||||||
|
Map<String, LongAccumulator> map) {
|
||||||
|
|
||||||
|
final CatalogueEntry out = new CatalogueEntry();
|
||||||
|
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
|
||||||
|
List<KeyValue> externals = new ArrayList<>();
|
||||||
|
Set<String> urlSet = new HashSet<>();
|
||||||
|
Set<String> cfSet = new HashSet<>();
|
||||||
|
Set<String> hbSet = new HashSet<>();
|
||||||
|
Set<String> countrySet = new HashSet<>();
|
||||||
|
Set<String> groups = new HashSet<>();
|
||||||
|
List<Group> groupList = new ArrayList<>();
|
||||||
|
if (ort.isPresent()) {
|
||||||
|
switch (ort.get().getClassid()) {
|
||||||
|
case "publication":
|
||||||
|
Optional<Journal> oJournal = Optional
|
||||||
|
.ofNullable(((Publication) input).getJournal());
|
||||||
|
if (oJournal.isPresent()) {
|
||||||
|
Journal value = oJournal.get();
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"Risis2_Publishing:Journal",
|
||||||
|
// "Journal",
|
||||||
|
value.getName() + ", " + value.getVol() + ", " + value.getIss()));
|
||||||
|
}
|
||||||
|
out.setUrl(Constants.PUBLICATION_URL + input.getId().substring(3));
|
||||||
|
externals.add(KeyValue.newInstance("system:type", "publication"));
|
||||||
|
break;
|
||||||
|
case "dataset":
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
|
||||||
|
out
|
||||||
|
.setVersion(
|
||||||
|
Optional
|
||||||
|
.ofNullable(id.getVersion())
|
||||||
|
.map(v -> v.getValue())
|
||||||
|
.orElse(""));
|
||||||
|
out.setUrl(Constants.DATASET_URL + input.getId().substring(3));
|
||||||
|
externals.add(KeyValue.newInstance("system:type", "dataset"));
|
||||||
|
break;
|
||||||
|
case "software":
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getCodeRepositoryUrl())
|
||||||
|
.ifPresent(value -> urlSet.add(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getDocumentationUrl())
|
||||||
|
.ifPresent(value -> value.forEach(v -> urlSet.add(v.getValue())));
|
||||||
|
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"Programming Language", Optional
|
||||||
|
.ofNullable(is.getProgrammingLanguage())
|
||||||
|
.map(v -> v.getClassname())
|
||||||
|
.orElse("")));
|
||||||
|
// .ifPresent(
|
||||||
|
// value -> externals.add(KeyValue.newInstance("Programming Language", value.getClassname())));
|
||||||
|
out.setUrl(Constants.SOFTWARE_URL + input.getId().substring(3));
|
||||||
|
externals.add(KeyValue.newInstance("system:type", "software"));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "other":
|
||||||
|
out.setUrl(Constants.ORP_URL + input.getId().substring(3));
|
||||||
|
externals.add(KeyValue.newInstance("system:type", "other"));
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
out.setLicense_id(Constants.DEFAULT_LICENCE_ID);
|
||||||
|
|
||||||
|
Optional<List<Author>> oauth = Optional
|
||||||
|
.ofNullable(input.getAuthor());
|
||||||
|
List<String> authList = new ArrayList<>();
|
||||||
|
if (oauth.isPresent()) {
|
||||||
|
authList = oauth.get().stream().map(v -> getAuthor(v)).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
if (authList.size() > 0) {
|
||||||
|
authList.forEach(a -> externals.add(KeyValue.newInstance("Risis2_Attribution:Author", a)));
|
||||||
|
// authList.forEach(a -> externals.add(KeyValue.newInstance("Author", a)));
|
||||||
|
}
|
||||||
|
|
||||||
|
String accessr = Optional
|
||||||
|
.ofNullable(input.getBestaccessright())
|
||||||
|
.map(
|
||||||
|
value -> value.getClassid())
|
||||||
|
.orElse("");
|
||||||
|
if (access.contains(accessr.toLowerCase())) {
|
||||||
|
groupList.add(Group.newInstance(null, accessr.toLowerCase().trim()));
|
||||||
|
}
|
||||||
|
if (!accessr.equals("")) {
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"AccessMode:Access Right", input.getBestaccessright().getClassname()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCollectedfrom())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.forEach(v -> cfSet.add(v.getValue())));
|
||||||
|
|
||||||
|
Optional<List<Field<String>>> ocont = Optional
|
||||||
|
.ofNullable(input.getContributor());
|
||||||
|
if (ocont.isPresent()) {
|
||||||
|
ocont
|
||||||
|
.get()
|
||||||
|
.forEach(
|
||||||
|
v -> externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"Risis2_Attribution:Contributor",
|
||||||
|
v.getValue())));
|
||||||
|
// .forEach(v -> externals.add(KeyValue.newInstance("Contributor", v.getValue())));
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.forEach(v -> countrySet.add(v.getClassname())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getDescription())
|
||||||
|
.ifPresent(value ->
|
||||||
|
|
||||||
|
getDescription(out, externals, value));
|
||||||
|
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"AccessMode:Embargo End Date", Optional
|
||||||
|
.ofNullable(input.getEmbargoenddate())
|
||||||
|
.map(value -> value.getValue())
|
||||||
|
.orElse("")));
|
||||||
|
|
||||||
|
final Set<String> formatSet = new HashSet<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getFormat())
|
||||||
|
.ifPresent(value -> value.forEach(f -> formatSet.add(f.getValue())));
|
||||||
|
|
||||||
|
String id = input.getId();
|
||||||
|
// id = id.substring(0, id.lastIndexOf(":") + 1) + "a" + id.substring(id.lastIndexOf(":") + 1);
|
||||||
|
out.setName(id.substring(id.indexOf('|') + 1).replace(":", "-"));
|
||||||
|
|
||||||
|
final Set<String> itSet = new HashSet<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getInstance())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.forEach(v -> {
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(v.getHostedby())
|
||||||
|
.ifPresent(hb -> hbSet.add(hb.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(v.getUrl())
|
||||||
|
.ifPresent(u -> u.forEach(url -> urlSet.add(url)));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(v.getInstancetype())
|
||||||
|
.ifPresent(it -> itSet.add(it.getClassname()));
|
||||||
|
|
||||||
|
}));
|
||||||
|
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"Language", Optional
|
||||||
|
.ofNullable(input.getLanguage())
|
||||||
|
.map(value -> {
|
||||||
|
String lang = value.getClassname();
|
||||||
|
if(lang.toLowerCase().equals(Constants.UNKNOWN)){
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return lang;
|
||||||
|
})
|
||||||
|
.orElse("")));
|
||||||
|
|
||||||
|
List<StructuredProperty> iTitle = Optional
|
||||||
|
.ofNullable(input.getTitle())
|
||||||
|
.map(
|
||||||
|
value -> value
|
||||||
|
.stream()
|
||||||
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>());
|
||||||
|
|
||||||
|
if (iTitle.size() > 0) {
|
||||||
|
out.setTitle(textReplacement(iTitle.get(0).getValue()));
|
||||||
|
} else {
|
||||||
|
out.setTitle("");
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.forEach(
|
||||||
|
v -> {
|
||||||
|
if (v.getQualifier().getClassid().equalsIgnoreCase("DOI")) {
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance("Identity:PID", "https://www.doi.org/" + v.getValue()));
|
||||||
|
} else {
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"Identity:PID",
|
||||||
|
v.getQualifier().getClassid() + ":" + v.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
}));
|
||||||
|
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"Risis2_Publishing:Publication Date", Optional
|
||||||
|
// "Publication Date", Optional
|
||||||
|
.ofNullable(input.getDateofacceptance())
|
||||||
|
.map(value -> value.getValue())
|
||||||
|
.orElse("")));
|
||||||
|
|
||||||
|
String publisher = Optional
|
||||||
|
.ofNullable(input.getPublisher())
|
||||||
|
.map(value -> value.getValue())
|
||||||
|
.orElse("");
|
||||||
|
|
||||||
|
if (!publisher.equals("")) {
|
||||||
|
groups.add(publisher.toLowerCase().replace(".", "-"));
|
||||||
|
externals
|
||||||
|
.add(
|
||||||
|
KeyValue
|
||||||
|
.newInstance(
|
||||||
|
"Risis2_Publishing:Publisher", publisher));
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<String> tagsSet = new HashSet<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getSubject())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.forEach(
|
||||||
|
s -> {
|
||||||
|
String classId = s.getQualifier().getClassid();
|
||||||
|
String prefix = "";
|
||||||
|
if (!(classId.equals("keyword") || classId.toLowerCase().equals(Constants.UNKNOWN)) &&
|
||||||
|
StringUtils.isNotEmpty(classId)) {
|
||||||
|
prefix = classId + ".";
|
||||||
|
}
|
||||||
|
String tag = prefix + s.getValue();
|
||||||
|
tag = tagReplacements(tag);
|
||||||
|
tagsSet.add(tag);
|
||||||
|
}));
|
||||||
|
|
||||||
|
cfSet.remove("Unknown Repository");
|
||||||
|
externals.add(KeyValue.newInstance("Risis2_Publishing:Collected From", getListOfValues(cfSet)));
|
||||||
|
|
||||||
|
hbSet.remove("Unknown Repository");
|
||||||
|
externals.add(KeyValue.newInstance("Risis2_Publishing:Hosted By", getListOfValues(hbSet)));
|
||||||
|
|
||||||
|
cfSet.forEach(cf -> groups.add(cf.toLowerCase().replace(".", "-")));
|
||||||
|
hbSet.forEach(hb -> groups.add(hb.toLowerCase().replace(".", "-")));
|
||||||
|
|
||||||
|
groups.forEach(g -> {
|
||||||
|
if (publishers.contains(g.trim())) {
|
||||||
|
groupList.add(Group.newInstance(null, g.trim()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
out.setGroups(groupList);
|
||||||
|
urlSet.stream().forEach(url -> externals.add(KeyValue.newInstance("Identity:URL", url)));
|
||||||
|
externals.add(KeyValue.newInstance("Country", getListOfValues(countrySet)));
|
||||||
|
externals.add(KeyValue.newInstance("Format", getListOfValues(formatSet)));
|
||||||
|
externals.add(KeyValue.newInstance("Resource Type", getListOfValues(itSet)));
|
||||||
|
List<Tag> tags = new ArrayList<>();
|
||||||
|
List<String> kws = new ArrayList<>();
|
||||||
|
tagsSet.forEach(tag -> {
|
||||||
|
if (tag.endsWith(",") || tag.endsWith(";") || tag.endsWith(".")) {
|
||||||
|
tag = tag.substring(0, tag.length() - 1);
|
||||||
|
}
|
||||||
|
if (tag.matches("^[a-zA-Z0-9_. -]*$") && tag.length() > 1 && tag.length() < 101) {
|
||||||
|
tags.add(Tag.newInstance(tag));
|
||||||
|
} else {
|
||||||
|
kws.add(tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
out.setTags(tags);
|
||||||
|
if (kws.size() > 0) {
|
||||||
|
kws.forEach(k -> externals.add(KeyValue.newInstance("keyword", k)));
|
||||||
|
}
|
||||||
|
out.setExtras(externals);
|
||||||
|
|
||||||
|
}
|
||||||
|
if (out == null)
|
||||||
|
map.get("void_records").add(1);
|
||||||
|
|
||||||
|
map.get("dumped_records").add(1);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String tagReplacements(String tag) {
|
||||||
|
return tag
|
||||||
|
.replace("&", " and ")
|
||||||
|
// .replace(" ", "_")
|
||||||
|
.replace("(", "_")
|
||||||
|
.replace(")", "_")
|
||||||
|
.replace("/", "_")
|
||||||
|
.replace("\\", "_")
|
||||||
|
.replace("[", "")
|
||||||
|
.replace("]", "")
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String textReplacement(String text){
|
||||||
|
return text
|
||||||
|
.replace("‘", "\"").replace("’", "\"")
|
||||||
|
.replace("“", "\"").replace("”", "\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getAuthor(Author v) {
|
||||||
|
String author = v.getFullname();
|
||||||
|
Optional<List<StructuredProperty>> oPid = Optional.ofNullable(v.getPid());
|
||||||
|
if (oPid.isPresent()) {
|
||||||
|
List<String> oList = oPid
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
p -> p
|
||||||
|
.getQualifier()
|
||||||
|
.getClassid()
|
||||||
|
.equalsIgnoreCase("orcid"))
|
||||||
|
.map(o -> o.getValue())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (oList.size() > 0)
|
||||||
|
author += ", " + oList.get(0);
|
||||||
|
}
|
||||||
|
return author;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getListOfValues(Set<String> cfSet) {
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
cfSet.forEach(value -> sw.append(value + "; "));
|
||||||
|
|
||||||
|
return sw.toString().length() > 0 ? sw.toString().substring(0, sw.toString().length() - 2) : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void getDescription(CatalogueEntry out, List<KeyValue> externals, List<Field<String>> value) {
|
||||||
|
Iterator<Field<String>> it = value.iterator();
|
||||||
|
if (it.hasNext()) {
|
||||||
|
out.setNotes(textReplacement(it.next().getValue()));
|
||||||
|
} else {
|
||||||
|
out.setNotes("");
|
||||||
|
}
|
||||||
|
it.forEachRemaining(v -> externals.add(KeyValue.newInstance("Description", v.getValue())));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,97 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.gcat;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
import java.util.zip.ZipEntry;
|
||||||
|
import java.util.zip.ZipInputStream;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.*;
|
||||||
|
import org.apache.http.HttpStatus;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class SendToCatalogue implements Serializable {
|
||||||
|
|
||||||
|
private static final Log log = LogFactory.getLog(SendToCatalogue.class);
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
SendToCatalogue.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/gcat/catalogue_parameters.json")));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String access_token = parser.get("gcattoken");
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
|
final String gcatBaseUrl = parser.get("gcatBaseUrl");
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(hdfsPath), true);
|
||||||
|
GCatAPIClient gCatAPIClient = new GCatAPIClient();
|
||||||
|
gCatAPIClient.setApplicationToken(access_token);
|
||||||
|
gCatAPIClient.setGcatBaseURL(gcatBaseUrl);
|
||||||
|
int purged = gCatAPIClient.purgeAll();
|
||||||
|
log.info("purged: " + purged);
|
||||||
|
while (fileStatusListIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String p_string = p.toString();
|
||||||
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
|
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
|
||||||
|
// String name = tmp.substring(tmp.lastIndexOf("/") + 1);
|
||||||
|
// log.info("Copying information for : " + name);
|
||||||
|
// fileSystem.copyToLocalFile(p, new Path("/tmp/" + name));
|
||||||
|
|
||||||
|
// try {
|
||||||
|
// InputStream in = new GZIPInputStream(new FileInputStream("/tmp/" + name));
|
||||||
|
|
||||||
|
// BufferedReader reader = new BufferedReader(
|
||||||
|
// new InputStreamReader(in));
|
||||||
|
FSDataInputStream in = fileSystem.open(p);
|
||||||
|
|
||||||
|
GZIPInputStream gis = new GZIPInputStream(in);
|
||||||
|
|
||||||
|
BufferedReader reader = new BufferedReader(new InputStreamReader(gis));
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
try {
|
||||||
|
gCatAPIClient.publish(line);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("ERROR_FOR " + line);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
// in.close();
|
||||||
|
|
||||||
|
// } finally {
|
||||||
|
// log.info("deleting information for: " + name);
|
||||||
|
// File f = new File("/tmp/" + name);
|
||||||
|
// if (f.exists()) {
|
||||||
|
// f.delete();
|
||||||
|
// }
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//}
|
|
@ -0,0 +1,148 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.gcat;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.apache.spark.util.LongAccumulator;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public class SparkDumpRISISCatalogue implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpRISISCatalogue.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpRISISCatalogue.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/gcat/dump_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String communityName = parser.get("communityName");
|
||||||
|
log.info("communityName: {}", communityName);
|
||||||
|
|
||||||
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
Map<String, LongAccumulator> map = new HashMap<>();
|
||||||
|
map.put("dumped_records", spark.sparkContext().longAccumulator("dumped_records"));
|
||||||
|
map.put("send_to_dump_records", spark.sparkContext().longAccumulator("send_to_dump_records"));
|
||||||
|
map.put("skipped_records", spark.sparkContext().longAccumulator("skipped_records"));
|
||||||
|
map.put("void_records", spark.sparkContext().longAccumulator("void_records"));
|
||||||
|
execDump(
|
||||||
|
spark, inputPath, outputPath, inputClazz, communityName, map);// ,
|
||||||
|
// dumpClazz);
|
||||||
|
log.info("records send to dump: {}", map.get("send_to_dump_records").value());
|
||||||
|
log.info("skipped records : {}", map.get("skipped_records").value());
|
||||||
|
log.info("dumped_records : {}", map.get("dumped_records").value());
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <I extends Result, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> void execDump(SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<I> inputClazz,
|
||||||
|
String communityName,
|
||||||
|
Map<String, LongAccumulator> map) {// Class<O> dumpClazz) {
|
||||||
|
|
||||||
|
// Set<String> communities = communityMap.keySet();
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<I, CatalogueEntry>) value -> execMap(
|
||||||
|
value, communityName, map),
|
||||||
|
Encoders.bean(CatalogueEntry.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.coalesce(1)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends Result> CatalogueEntry execMap(I value, String community,
|
||||||
|
Map<String, LongAccumulator> map) {
|
||||||
|
|
||||||
|
if (value.getDataInfo().getDeletedbyinference() || value.getDataInfo().getInvisible()) {
|
||||||
|
// map.get("skipped_records").add(1);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Optional<List<Context>> inputContext = Optional.ofNullable(value.getContext());
|
||||||
|
if (!inputContext.isPresent()) {
|
||||||
|
map.get("skipped_records").add(1);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (inputContext.get().stream().map(c -> {
|
||||||
|
String id = c.getId();
|
||||||
|
if (id.contains("::")) {
|
||||||
|
return id.substring(0, id.indexOf("::"));
|
||||||
|
}
|
||||||
|
return id;
|
||||||
|
}).collect(Collectors.toList()).contains(community)) {
|
||||||
|
map.get("send_to_dump_records").add(1);
|
||||||
|
return Mapper.map(value, map);
|
||||||
|
}
|
||||||
|
map.get("skipped_records").add(1);
|
||||||
|
return null;
|
||||||
|
// List<String> toDumpFor = inputContext.get().stream().map(c -> {
|
||||||
|
// String id = c.getId();
|
||||||
|
// if (id.contains("::")) {
|
||||||
|
// id = id.substring(0, id.indexOf("::"));
|
||||||
|
// }
|
||||||
|
// if (community.equals(id)) {
|
||||||
|
// dumpedRecords.add(1);
|
||||||
|
// return id;
|
||||||
|
// }
|
||||||
|
// return null;
|
||||||
|
// }).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
// if (toDumpFor.size() == 0) {
|
||||||
|
// skippedRecords.add(1);
|
||||||
|
// return null;
|
||||||
|
// }
|
||||||
|
// return Mapper.map(value);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "open",
|
||||||
|
"title": "Open Access",
|
||||||
|
"description": "Open access refers to a resource that is immediately and permanently online, and free for all on the Web, without financial and technical barriers.The resource is either stored in the repository or referenced to an external journal or trustworthy archive.",
|
||||||
|
"image_url": "https://creativecommons.org/wp-content/uploads/2016/05/open-access-logo.png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "closed",
|
||||||
|
"title": "Metadata-only Access",
|
||||||
|
"description": "Metadata only access refers to a resource in which access is limited to metadata only. The resource itself is described by the metadata, but neither is directly available through the system or platform nor can be referenced to an open access copy in an external journal or trustworthy archive.",
|
||||||
|
"image_url": "https://upload.wikimedia.org/wikipedia/commons/0/0e/Closed_Access_logo_transparent.svg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "restricted",
|
||||||
|
"title": "Restricted Access",
|
||||||
|
"description": "Restricted access refers to a resource that is available in a system but with some type of restriction for full open access. This type of access can occur in a number of different situations. Some examples are described below: The user must log-in to the system in order to access the resource The user must send an email to the author or system administrator to access the resource Access to the resource is restricted to a specific community (e.g. limited to a university community)",
|
||||||
|
"image_url": "https://upload.wikimedia.org/wikipedia/commons/3/3d/Mixed_Access_logo_PLoS_transparent.svg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "embargoed",
|
||||||
|
"title": "Under embargo",
|
||||||
|
"description": "Embargoed access refers to a resource that is metadata only access until released for open access on a certain date. Embargoes can be required by publishers and funders policies, or set by the author (e.g such as in the case of theses and dissertations).",
|
||||||
|
"image_url": "https://upload.wikimedia.org/wikipedia/commons/1/16/Lock-green-clock.svg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "datacite",
|
||||||
|
"title": "Datacite",
|
||||||
|
"description": "Research results available via Datacite, a leading global non-profit organisation that provides persistent identifiers (DOIs) for research data and other research outputs.",
|
||||||
|
"image_url":"https://assets.datacite.org/images/logo-big.png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "zenodo",
|
||||||
|
"title": "ZENODO",
|
||||||
|
"description": "Research results available via Zenodo. Zenodo is a general purpose repository that enables researchers, scientists, projects and institutions to share, preserve and showcase multidisciplinary research results (data, software and publications) that are not part of the existing institutional or subject-based repositories of the research communities. It is founded in the trustworthy CERN data centre.",
|
||||||
|
"image_url":"https://about.zenodo.org/static/img/logos/zenodo-gradient-1000.png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "hal",
|
||||||
|
"title": "Hyper Article en Ligne",
|
||||||
|
"description": "Research results available via Hyper Article en Ligne (HAL). HAL is an open archive where authors can deposit scholarly documents from all academic fields.",
|
||||||
|
"image_url":"https://hal.archives-ouvertes.fr/public/hal.logo.png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "figshare",
|
||||||
|
"title": "figshare",
|
||||||
|
"description": "Research results available via figshare, a repository where users can make all of their research outputs available in a citable, shareable and discoverable manner",
|
||||||
|
"image_url":"https://website-p-eu.figstatic.com/assets/776d94c0a5a92799ce5536fd94e8e3c2d759a3c2/public/global/images/full-logo.png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "digital.csic",
|
||||||
|
"title": "DIGITAL.CSIC",
|
||||||
|
"description": "Research results available via DIGITAL.CSIC, the institutional repository of the Spanish National Research Council",
|
||||||
|
"image_url":"https://digital.csic.es/imagenes/logo_DC_peque.png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dans",
|
||||||
|
"title": "DANS - Data Archiving and Networked Services",
|
||||||
|
"description": "Research results available via DANS, the Netherlands institute for permanent access to digital research resources",
|
||||||
|
"image_url":"https://easy.dans.knaw.nl/ui/images/lay-out/logo_dans.png"
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,30 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"gct",
|
||||||
|
"paramLongName":"gcattoken",
|
||||||
|
"paramDescription": "the token for the deposition on the catalogue",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "p",
|
||||||
|
"paramLongName": "hdfsPath",
|
||||||
|
"paramDescription": "the path where storing the sequential file",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "nn",
|
||||||
|
"paramLongName": "hdfsNameNode",
|
||||||
|
"paramDescription": "the name node on hdfs",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "gbu",
|
||||||
|
"paramLongName": "gcatBaseUrl",
|
||||||
|
"paramDescription": "the base url for the catalogue",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
|
||||||
|
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"tn",
|
||||||
|
"paramLongName":"resultTableName",
|
||||||
|
"paramDescription": "the name of the result table we are currently working on",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cm",
|
||||||
|
"paramLongName":"communityName",
|
||||||
|
"paramDescription": "the name of the community for which to execute the dump to the catalogue",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,230 @@
|
||||||
|
<workflow-app name="dump_community_products_to_catalogue" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>communityName</name>
|
||||||
|
<description>The name of the community for which execute the dump for the catalogue</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>gcattoken</name>
|
||||||
|
<description>the access token for the deposition on the cataloge</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>gcatBaseUrl</name>
|
||||||
|
<description>the baseUrl to access the catalogue</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<description>the target hive database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="reset_outputpath"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="reset_outputpath">
|
||||||
|
<fs>
|
||||||
|
<delete path="${workingDir}"/>
|
||||||
|
<mkdir path="${workingDir}"/>
|
||||||
|
</fs>
|
||||||
|
<ok to="fork_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<fork name="fork_dump">
|
||||||
|
<path start="dump_publication"/>
|
||||||
|
<path start="dump_dataset"/>
|
||||||
|
<path start="dump_orp"/>
|
||||||
|
<path start="dump_software"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="dump_publication">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table publication for RISIS related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
|
||||||
|
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_dataset">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table dataset for RISIS related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
|
||||||
|
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_orp">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table other for RISIS related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||||
|
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_software">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table software for RISIS related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
|
||||||
|
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_dump" to="populate_catalogue"/>
|
||||||
|
|
||||||
|
<action name="populate_catalogue">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.gcat.SendToCatalogue</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${workingDir}</arg>
|
||||||
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--gcattoken</arg><arg>${gcattoken}</arg>
|
||||||
|
<arg>--gcatBaseUrl</arg><arg>${gcatBaseUrl}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,128 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.gcat;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue;
|
||||||
|
|
||||||
|
public class DumpJobTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
|
||||||
|
|
||||||
|
private static HashMap<String, String> map = new HashMap<>();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(DumpJobTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(DumpJobTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSoftware() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/software.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkDumpRISISCatalogue.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/result",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||||
|
"-communityName", "risis"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(3, verificationDataset.count());
|
||||||
|
verificationDataset.show(false);
|
||||||
|
|
||||||
|
// verificationDataset.select("instance.type").show(false);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDataset() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/dataset_for_dump.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkDumpRISISCatalogue.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/result",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
|
"-communityName", "science-innovation-policy"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, verificationDataset.count());
|
||||||
|
verificationDataset.show(false);
|
||||||
|
|
||||||
|
// verificationDataset.select("instance.type").show(false);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,231 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.gcat;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.http.HttpStatus;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.gcat.GCatAPIClient;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NEVER EVER ENABLE THIS CLASS UNLESS YOU ABSOLUTELY KNOW WHAT YOU ARE DOING: with the proper parameters set it can
|
||||||
|
* drop a D4Science Catalogue
|
||||||
|
*/
|
||||||
|
//@Disabled
|
||||||
|
public class GCatAPIClientTest {
|
||||||
|
|
||||||
|
private static GCatAPIClient client;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setup() {
|
||||||
|
client = new GCatAPIClient();
|
||||||
|
client.setApplicationToken("816486a3-60a9-4ecc-a7e0-a96740a90207-843339462");
|
||||||
|
client.setGcatBaseURL("https://gcat.d4science.org/gcat/");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testList() throws IOException, URISyntaxException {
|
||||||
|
System.out.println(client.list(0, 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublishAndPurge() throws IOException, URISyntaxException {
|
||||||
|
// The 'name' must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, '-'
|
||||||
|
// and '_'.
|
||||||
|
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
|
||||||
|
String objidentifier = "fake";
|
||||||
|
String json = IOUtils
|
||||||
|
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat.json"));
|
||||||
|
System.out.println("Creating item...");
|
||||||
|
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
|
||||||
|
System.out.println("item created, now listing...");
|
||||||
|
Assertions.assertEquals(1, client.list(0, 10).size());
|
||||||
|
// and then drop it
|
||||||
|
Assertions.assertTrue(client.purge(objidentifier));
|
||||||
|
System.out.println("item purged");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublish() throws IOException, URISyntaxException {
|
||||||
|
// The 'name' must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, '-'
|
||||||
|
// and '_'.
|
||||||
|
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
|
||||||
|
String json = IOUtils
|
||||||
|
.toString(
|
||||||
|
getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_software_20201130.json"));
|
||||||
|
System.out.println("Creating item...");
|
||||||
|
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
|
||||||
|
System.out.println("item created, now listing...");
|
||||||
|
// Assertions.assertEquals(1, client.list(0, 10).size());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bulkPublishORP() throws IOException, URISyntaxException {
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_other.json")
|
||||||
|
.getPath()));
|
||||||
|
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bulkPublishDATS() throws IOException, URISyntaxException {
|
||||||
|
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/dats_20201126")
|
||||||
|
.getPath()));
|
||||||
|
|
||||||
|
String line;
|
||||||
|
int count = 1;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
|
||||||
|
System.out.println(count);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bulkPublishCompressedSW() throws IOException, URISyntaxException {
|
||||||
|
|
||||||
|
BufferedReader reader = new BufferedReader(
|
||||||
|
new InputStreamReader(new GZIPInputStream(new FileInputStream(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/software_20201130.gz")
|
||||||
|
.getPath()))));
|
||||||
|
|
||||||
|
String line;
|
||||||
|
int count = 1;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
|
||||||
|
System.out.println(count);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bulkPublishPUBS() throws IOException, URISyntaxException {
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_publications")
|
||||||
|
.getPath()));
|
||||||
|
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void purgeItem() throws IOException, URISyntaxException {
|
||||||
|
String objidentifier = "dedup_wf_001--10160b3eafcedeb0a384fc400fe1c3fa";
|
||||||
|
Assertions.assertTrue(client.purge(objidentifier));
|
||||||
|
System.out.println("item purged");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPurgeUnexisting() throws IOException, URISyntaxException {
|
||||||
|
String id = "1234";
|
||||||
|
Assertions.assertFalse(client.purge(id));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPurgeAllEmptyCat() throws IOException, URISyntaxException {
|
||||||
|
Assertions.assertEquals(179, client.purgeAll());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublishAndPurgeAll() throws IOException, URISyntaxException {
|
||||||
|
String json = IOUtils
|
||||||
|
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_pub.json"));
|
||||||
|
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
|
||||||
|
System.out.println("item created, now listing...");
|
||||||
|
Assertions.assertEquals(1, client.list(0, 10).size());
|
||||||
|
// and then drop all
|
||||||
|
Assertions.assertEquals(1, client.purgeAll());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void purgeList() throws IOException, URISyntaxException {
|
||||||
|
List<String> toPurge = Lists.newArrayList();
|
||||||
|
toPurge.add("dedup_wf_001--f20bb2f571f1fdcb9a66bec850a8267e");
|
||||||
|
toPurge.add("od_______166--c5caa1b39d9c7998f0f7c37f948ea097");
|
||||||
|
toPurge.add("od______2659--d956cae2b4a87eeaae4291530dfc88cf");
|
||||||
|
toPurge.add("od______2659--50fdc84d38782630c8fe73ca66e4e1e9");
|
||||||
|
toPurge.add("od______2659--1bbce6c3a47aa79cfc2c2973842d0c2c");
|
||||||
|
toPurge.add("od______2659--037571fc3efb68d43ae8a4d8078ddd82");
|
||||||
|
toPurge.add("od______2659--7bd3645a599d5c9fd5eb0a5c87b79948");
|
||||||
|
toPurge.add("od______3379--50842bb0a03d644a6ed831c0a59d25f4");
|
||||||
|
toPurge.add("od_______177--50842bb0a03d644a6ed831c0a59d25f4");
|
||||||
|
toPurge.add("od______4325--d6ad4e4111afd06f69c1597a60f09cef");
|
||||||
|
toPurge.add("od______3379--d6ad4e4111afd06f69c1597a60f09cef");
|
||||||
|
toPurge.add("od_______177--d6ad4e4111afd06f69c1597a60f09cef");
|
||||||
|
toPurge.add("od______4325--50842bb0a03d644a6ed831c0a59d25f4");
|
||||||
|
toPurge.add("od______4325--b823dc448d06160da67ebdcd1a67c544");
|
||||||
|
toPurge.add("od_______177--b823dc448d06160da67ebdcd1a67c544");
|
||||||
|
toPurge.add("od______1106--7407d45261b901f936319762b30a66f0");
|
||||||
|
toPurge.add("od______3379--b823dc448d06160da67ebdcd1a67c544");
|
||||||
|
toPurge.add("od______4325--655f1b9517a0dd16efd05b572f66927b");
|
||||||
|
toPurge.add("od______3379--655f1b9517a0dd16efd05b572f66927b");
|
||||||
|
toPurge.add("od_______177--655f1b9517a0dd16efd05b572f66927b");
|
||||||
|
toPurge.add("od______3379--308718c4498f1c857d9dec8fc8412bed");
|
||||||
|
toPurge.add("od_______177--308718c4498f1c857d9dec8fc8412bed");
|
||||||
|
toPurge.add("od______4325--308718c4498f1c857d9dec8fc8412bed");
|
||||||
|
toPurge.add("od______4325--26025853fbcb01858d58e3c268d144ce");
|
||||||
|
toPurge.add("od_______177--26025853fbcb01858d58e3c268d144ce");
|
||||||
|
toPurge.add("od______3379--4af67cb057f92a8c276c3aae56980430");
|
||||||
|
toPurge.add("od_______177--4af67cb057f92a8c276c3aae56980430");
|
||||||
|
toPurge.add("od______4325--4af67cb057f92a8c276c3aae56980430");
|
||||||
|
toPurge.add("od______3379--1c384c7771d6d5ec0b2b14264d0af8cd");
|
||||||
|
toPurge.add("od_______177--1c384c7771d6d5ec0b2b14264d0af8cd");
|
||||||
|
toPurge.add("od______4325--1c384c7771d6d5ec0b2b14264d0af8cd");
|
||||||
|
toPurge.add("od_______177--15a65e2433929cc77ae7b10fd56c1e9e");
|
||||||
|
toPurge.add("od______4325--15a65e2433929cc77ae7b10fd56c1e9e");
|
||||||
|
toPurge.add("od______3379--15a65e2433929cc77ae7b10fd56c1e9e");
|
||||||
|
toPurge.add("od_______177--c5caa1b39d9c7998f0f7c37f948ea097");
|
||||||
|
toPurge.add("od______3379--c5caa1b39d9c7998f0f7c37f948ea097");
|
||||||
|
toPurge.add("od_______177--11df09fd7a9ad36f0de546ea991182ce");
|
||||||
|
toPurge.add("od______3379--11df09fd7a9ad36f0de546ea991182ce");
|
||||||
|
toPurge.add("od______4325--11df09fd7a9ad36f0de546ea991182ce");
|
||||||
|
toPurge.add("dedup_wf_001::2fb2a1e02bfae184789181009d59232c");
|
||||||
|
toPurge.add("od_______177--39688c4bc9fd811e55f914e58701536d");
|
||||||
|
toPurge.add("od______3379--39688c4bc9fd811e55f914e58701536d");
|
||||||
|
toPurge.add("od______4325--39688c4bc9fd811e55f914e58701536d");
|
||||||
|
toPurge.add("od______4325--200c9c70dff2e86dad6ba555381027fc");
|
||||||
|
toPurge.add("od______3379--200c9c70dff2e86dad6ba555381027fc");
|
||||||
|
toPurge.add("od_______177--200c9c70dff2e86dad6ba555381027fc");
|
||||||
|
toPurge.add("od______3379--cf54f6149e7427d77dd37fccc3a0c747");
|
||||||
|
toPurge.add("od______3379--1f50518c04e2c0966425a350def1f82a");
|
||||||
|
toPurge.add("od______4325--1f50518c04e2c0966425a350def1f82a");
|
||||||
|
toPurge.add("od_______177--1f50518c04e2c0966425a350def1f82a");
|
||||||
|
toPurge.add("od______3379--bff6c5b35457f7f51d52d41323226663");
|
||||||
|
toPurge.add("od_______177--bff6c5b35457f7f51d52d41323226663");
|
||||||
|
toPurge.add("od______4325--bff6c5b35457f7f51d52d41323226663");
|
||||||
|
toPurge.add("od______2659--cbdcbe3865868c15680d95c7f83c3ff7");
|
||||||
|
toPurge.add("dedup_wf_001--569da4f719f51eb07f23548000e57d72");
|
||||||
|
toPurge.add("od_______177--bf88e4876d9c5a9720ca3fefe3ce93ea");
|
||||||
|
toPurge.add("od______3379--bf88e4876d9c5a9720ca3fefe3ce93ea");
|
||||||
|
toPurge.add("od______4325--bf88e4876d9c5a9720ca3fefe3ce93ea");
|
||||||
|
for (String name : toPurge) {
|
||||||
|
System.out.println("Purging " + name);
|
||||||
|
if (client.purge(name))
|
||||||
|
System.out.println("Purged");
|
||||||
|
else {
|
||||||
|
System.out.println("Failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,2 @@
|
||||||
|
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Lepori, Benedetto"},{"key":"Risis2_Attribution:Author","value":"Guerini, Massimilano"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752860"},{"key":"Risis2_Publishing:Publication Date","value":"2020-04-15"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"Zenodo; ZENODO; Datacite"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:URL","value":"https://zenodo.org/record/3752861"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752860"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Dataset"}],"groups":[{"name":"open"},{"name":"zenodo"},{"name":"datacite"}],"license_id":"notspecified","name":"dedup_wf_001--c4634a42d4b98e594e0796a41b47ec61","notes":"<p>This file provides the correspondence table between EUROSTAT NUTS3 classification and the adapted regional classification used by the RISIS-KNOWMAK project. This regional classification fits the structure of knowledge production in Europe and addresses some knowm problems of the NUTS3 classification, such as the treatment of large agglomerations, while remaining fully compatible with the EUROSTAT NUTS regional classification. This compatibility allows combining all KNOWMAK data with regional statistics (at NUTS3 level, 2016 edition) from EUROSTAT.</p>\n\n<p>More precisely, the classification includes EUROSTAT metropolitan regions (based on the aggregation of NUTS3-level regions) and NUTS2 regions for the remaining areas; further, a few additional centers for knowledge production, like Oxford and Leuven, have been singled out at NUTS3 level. The resulting classification is therefore more fine-grained than NUTS2 in the areas with sizeable knowledge production, but at the same time recognizes the central role of metropolitan areas in knowledge production. While remaining compatible with NUTS, the classification allows addressing two well-known shortcomings: a) the fact that some large cities are split between NUTS regions (London) and b) the fact that NUTS3 classification in some countries includes many very small regions, as in the case of Germany</p>","tags":[],"title":"RISIS-KNOWMAK NUTS adapted classification","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::c4634a42d4b98e594e0796a41b47ec61","version":""}
|
||||||
|
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Laredo, Philippe, 0000-0002-5014-9132"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560117"},{"key":"Risis2_Publishing:Publication Date","value":"2019-02-08"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"ZENODO; Datacite; figshare"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO; figshare"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560117"},{"key":"Identity:URL","value":"https://zenodo.org/record/2560117"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:URL","value":"https://figshare.com/articles/Introduction_of_RISIS_project_by_Philippe_Laredo/7699286"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Audiovisual"}],"groups":[{"name":"open"},{"name":"zenodo"},{"name":"figshare"},{"name":"datacite"}],"license_id":"notspecified","name":"dedup_wf_001--10160b3eafcedeb0a384fc400fe1c3fa","notes":"<p>Introduction of RISIS project by Philippe Laredo</p>","tags":[],"title":"Introduction of RISIS project by Philippe Laredo","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::10160b3eafcedeb0a384fc400fe1c3fa","version":"None"}
|
|
@ -0,0 +1 @@
|
||||||
|
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Akol, Angela, 0000-0003-4594-3478"},{"key":"Risis2_Attribution:Author","value":"Moland, Karen"},{"key":"Risis2_Attribution:Author","value":"Babirye, Juliet"},{"key":"Risis2_Attribution:Author","value":"Engebretsen, Ingunn, 0000-0001-5852-3611"},{"key":"AccessMode:Access Right","value":"not available"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"Undetermined"},{"key":"Identity:PID","value":"https://www.doi.org/10.6084/m9.figshare.c.4064003"},{"key":"Identity:PID","value":"https://www.doi.org/10.6084/m9.figshare.c.4064003.v1"},{"key":"Risis2_Publishing:Publication Date","value":"2018-04-10"},{"key":"Risis2_Publishing:Publisher","value":"Figshare"},{"key":"Risis2_Publishing:Collected From","value":"Datacite"},{"key":"Risis2_Publishing:Hosted By","value":"figshare"},{"key":"Identity:URL","value":"https://dx.doi.org/10.6084/m9.figshare.c.4064003.v1"},{"key":"Identity:URL","value":"https://dx.doi.org/10.6084/m9.figshare.c.4064003"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Dataset"},{"key":"keyword","value":"FOS: Biological sciences"}],"groups":[{"name":"figshare"},{"name":"datacite"}],"license_id":"notspecified","name":"dedup_wf_001--7151b1070802f6ed0ced85a5b175b368","notes":"Abstract Background Early identification and management of mental illness in childhood and adolescence helps to avert debilitating mental illness in adulthood but the attention given to Child and Adolescent Mental Health (CAMH) has until recently been low. Traditional healers are often consulted by patients with mental illness and in Uganda, up to 60% of patients attending traditional healers have moderate to severe mental illness. Poor access to CAMH care in Uganda creates a treatment gap that could be met through enhanced collaboration between traditional healers and biomedical health systems. The aim of this study was to explore traditional healers’ views on their collaboration with biomedical health systems so as to inform the implementation of strategies to improve access to CAMH services in Uganda. Methods In-depth interviews with 20 purposively selected traditional healers were conducted in November 2015. A semi-structured interview guide was used to explore: 1) The experiences of traditional healers with mental ill-health in children and adolescents; 2) their willingness to collaborate with the formal health system; and 3) their perception of clinicians’ willingness to collaborate with them. Interviews were conducted in local languages and tape recorded. Data were analysed using thematic analysis. Results Traditional healers described several experiences managing children and adolescents with mental illness, which they ascribed to spiritual and physical causes. The spiritual explanations were a consequence of unhappy ancestral spirits, modern religions and witchcraft, while physical causes mentioned included substance abuse and fevers. No traditional healer had received a patient referred to them from a medical clinic although all had referred patients to clinics for non-mental health reasons. Traditional healers expressed distrust in biomedical health systems and believed their treatments were superior to medical therapies in alleviating mental suffering. They expressed willingness to collaborate with biomedical providers. However, traditional healers believe clinicians disregard them and would not be willing to collaborate with them. Conclusion Potential for collaboration between traditional healers and biomedical health systems for improving access to CAMH services in Uganda exists, but is undermined by mutual mistrust and competition between traditional healers and clinicians.","tags":[{"name":"Medicine"},{"name":"Biotechnology"},{"name":"69999 Biological Sciences not elsewhere classified"},{"name":"mesheuropmc.education"},{"name":"Cancer"},{"name":"Science Policy"}],"title":"“We are like co-wives”: Traditional healers' views on collaborating with the formal Child and Adolescent Mental Health System in Uganda","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::7151b1070802f6ed0ced85a5b175b368","version":""}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,68 @@
|
||||||
|
{
|
||||||
|
"name": "nstest--test",
|
||||||
|
"private": false,
|
||||||
|
"license_id": "notspecified",
|
||||||
|
"version": "",
|
||||||
|
"title": "The role of R&D networks for exploitative and explorative regional knowledge creation",
|
||||||
|
"notes": "<p>The beneficial effect of R&D networks on regional knowledge creation is widely undenied. They constitute essential means to create new knowledge through collaborative research efforts and enable access to new knowledge by bridging the way to region-external knowledge bases. However, we argue that the significance and strength of the effect differs for different modes of knowledge creation – exploitative and explorative – as well as for the quantity and quality of knowledge created. To explore these differences, we estimate a set of spatial autoregressive (SAR) models for European regions with varying network effects that are based on a region’s network centrality in the cross-region R&D network of the EU Framework Programme (FP). The results point consistently to a higher positive impact of reginal network centralities on explorative than exploitative knowledge creation. Moreover, the quantity and quality of newly created knowledge is found to be conversely affected by the regional network centralities considered. Interestingly, a high number of links (degree centrality) has in relative terms higher positive effects on the quality, rather than the pure quantity of knowledge outputs, while an authoritative network position is more conducive for increasing the quantity than the quality of knowledge.</p>",
|
||||||
|
"url": "https://beta.risis.openaire.eu/search/publication?articleId=od______2659::155332689ed5defb5d9a68a42fd8cd14",
|
||||||
|
"maintainer": "",
|
||||||
|
"extras": [
|
||||||
|
{
|
||||||
|
"key": "Publisher",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Access right",
|
||||||
|
"value": "Open Access"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Collected from",
|
||||||
|
"value": "ZENODO"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "PID",
|
||||||
|
"value": "doi:10.5281/zenodo.3724562"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Author",
|
||||||
|
"value": "Neuländtner, Martina"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Author",
|
||||||
|
"value": "Scherngell, Thomas"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Type",
|
||||||
|
"value": "publication"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Language",
|
||||||
|
"value": "Undetermined"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Country",
|
||||||
|
"value": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Subject",
|
||||||
|
"value": "R&D networks, modes of knowledge creation, exploitation, exploration, spatial autoregressive model"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Publication date",
|
||||||
|
"value": "2020-01-01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Resource type",
|
||||||
|
"value": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "URL",
|
||||||
|
"value": "http://dx.doi.org/10.5281/zenodo.3724562"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Hosted by",
|
||||||
|
"value": "ZENODO"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,92 @@
|
||||||
|
{
|
||||||
|
"extras": [
|
||||||
|
{
|
||||||
|
"key": "system:type",
|
||||||
|
"value": "dataset"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Risis2_Attribution:Author",
|
||||||
|
"value": "Laredo, Philippe, 0000-0002-5014-9132"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "AccessMode:Access Right",
|
||||||
|
"value": "Open Access"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Risis2_Attribution:Contributor",
|
||||||
|
"value": "European Commission"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "AccessMode:Embargo End Date",
|
||||||
|
"value": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Language",
|
||||||
|
"value": "English"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Identity:PID",
|
||||||
|
"value": "https://www.doi.org/10.5281/zenodo.2560116"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Identity:PID",
|
||||||
|
"value": "https://www.doi.org/10.5281/zenodo.2560117"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Risis2_Publishing:Publication Date",
|
||||||
|
"value": "2019-02-08"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Risis2_Publishing:Publisher",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Risis2_Publishing:Collected From",
|
||||||
|
"value": "ZENODO; Datacite; figshare"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Risis2_Publishing:Hosted By",
|
||||||
|
"value": "Zenodo; ZENODO; figshare"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Identity:URL",
|
||||||
|
"value": "http://dx.doi.org/10.5281/zenodo.2560117"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Identity:URL",
|
||||||
|
"value": "https://zenodo.org/record/2560117"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Identity:URL",
|
||||||
|
"value": "http://dx.doi.org/10.5281/zenodo.2560116"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Identity:URL",
|
||||||
|
"value": "https://figshare.com/articles/Introduction_of_RISIS_project_by_Philippe_Laredo/7699286"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Country",
|
||||||
|
"value": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Format",
|
||||||
|
"value": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "Resource Type",
|
||||||
|
"value": "Audiovisual"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"groups": [
|
||||||
|
{"name": "open"},
|
||||||
|
{"name": "zenodo"},
|
||||||
|
{"name": "figshare"}
|
||||||
|
],
|
||||||
|
"license_id": "notspecified",
|
||||||
|
"name": "dedup_wf_001--10160b3eafcedeb0a384fc400fe1c3fa",
|
||||||
|
"notes": "<p>Introduction of RISIS project by Philippe Laredo<\/p>",
|
||||||
|
"tags": [],
|
||||||
|
"title": "Introduction of RISIS project by Philippe Laredo",
|
||||||
|
"url": "https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::10160b3eafcedeb0a384fc400fe1c3fa",
|
||||||
|
"version": "None"
|
||||||
|
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue