Merge branch 'd4science' of code-repo.d4science.org:miriam.baglioni/dnet-hadoop into d4science

This commit is contained in:
Miriam Baglioni 2020-06-23 11:16:25 +02:00
commit f12b1ede24
4 changed files with 207 additions and 49 deletions

View File

@ -6,8 +6,7 @@ import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import javax.ws.rs.HttpMethod;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpEntity;
@ -15,13 +14,13 @@ import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import com.google.gson.Gson;
@ -35,6 +34,7 @@ public class GCatAPIClient {
private static final Log log = LogFactory.getLog(GCatAPIClient.class);
public static final int BULK_SIZE = 100;
private String gcatBaseURL;
private final String itemPath = "items";
private String applicationToken;
@ -43,23 +43,27 @@ public class GCatAPIClient {
}
/**
* Publish the json as in the D4science catalogue as an item. TODO: does the POST returns the whole item or just its
* catalogue identifier?
* Publish the json as in the D4science catalogue as an item.
*
* @param jsonMetadata
* @param jsonMetadata the whole published json record
* @return the HTTP status code of the request
* @throws IOException
*/
public int publish(final String jsonMetadata) throws IOException {
public int publish(final String jsonMetadata) throws IOException, URISyntaxException {
try (CloseableHttpClient client = HttpClients.createDefault()) {
HttpPost post = new HttpPost(getGcatBaseURL() + itemPath);
URIBuilder builder = new URIBuilder(getGcatBaseURL() + itemPath);
HttpPost post = new HttpPost(builder.build());
post.setHeader("gcube-token", getApplicationToken());
post.addHeader("Content-Type", "application/json");
post.addHeader("Accept", "application/json");
StringEntity entity = new StringEntity(jsonMetadata, StandardCharsets.UTF_8);
post.setEntity(entity);
HttpResponse response = client.execute(post);
System.out.println(response.getStatusLine().getStatusCode());
if (log.isDebugEnabled()) {
log.debug(response.getStatusLine());
log.debug(IOUtils.toString(response.getEntity().getContent()));
}
return response.getStatusLine().getStatusCode();
}
}
@ -69,7 +73,7 @@ public class GCatAPIClient {
*
* @param offset offset
* @param limit limit
* @return list of json items
* @return list of catalogue item names
* @throws IOException
* @throws URISyntaxException
*/
@ -98,12 +102,53 @@ public class GCatAPIClient {
}
};
String responseBody = client.execute(get, responseHandler);
System.out.println(responseBody);
Gson gson = new Gson();
return gson.fromJson(responseBody, List.class);
}
}
public boolean purge(final String objidentifier) throws IOException, URISyntaxException {
String resCatName = getCatalogueNameFrom(objidentifier);
try (CloseableHttpClient client = HttpClients.createDefault()) {
URIBuilder builder = new URIBuilder(getGcatBaseURL() + itemPath + "/" + resCatName)
.addParameter("purge", "true");
HttpDelete del = new HttpDelete(builder.build());
del.setHeader("gcube-token", getApplicationToken());
del.addHeader("Content-Type", "application/json");
del.addHeader("Accept", "application/json");
HttpResponse response = client.execute(del);
if (log.isDebugEnabled()) {
log.debug(response.getStatusLine());
}
return response.getStatusLine().getStatusCode() == HttpStatus.SC_NO_CONTENT;
}
}
public int purgeAll() throws IOException, URISyntaxException {
int count = 0;
int deleted = 0;
int failed = 0;
List<String> list = list(0, BULK_SIZE);
do {
for (String itemName : list) {
count++;
if (purge(itemName))
deleted++;
else {
failed++;
log.warn("Deletion of item " + itemName + " failed");
}
}
list = list(0, BULK_SIZE);
} while (list.size() > 0);
log.info(String.format("PurgeAll completed: total = %d; deleted = %d; failed = %d", count, deleted, failed));
return deleted;
}
protected String getCatalogueNameFrom(final String objIdentifier) {
return objIdentifier.replaceAll("::", "_");
}
public String getGcatBaseURL() {
return gcatBaseURL;
}

View File

@ -1,38 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import java.net.URISyntaxException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.oa.graph.dump.gcat.GCatAPIClient;
/**
* TODO: ask for a token for the dev gcat
*/
public class GCatAPIClientTest {
private static GCatAPIClient client;
// @BeforeAll
public static void setup() {
client = new GCatAPIClient();
client.setApplicationToken("");
client.setGcatBaseURL("https://gcat.d4science.org/gcat/");
}
// @Test
public void testList() throws IOException, URISyntaxException {
System.out.println(client.list(0, 10));
}
// @Test
public void testPublish() throws IOException {
String json = IOUtils
.toString(getClass().getResourceAsStream("eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_pub.json"));
client.publish(json);
}
}

View File

@ -0,0 +1,83 @@
package eu.dnetlib.dhp.oa.graph.gcat;
import java.io.IOException;
import java.net.URISyntaxException;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpStatus;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.oa.graph.dump.gcat.GCatAPIClient;
import sun.jvm.hotspot.utilities.Assert;
/**
* NEVER EVER ENABLE THIS CLASS UNLESS YOU ABSOLUTELY KNOW WHAT YOU ARE DOING: with the proper parameters set it can
* dropped a D4Science Catalogue
*/
@Disabled
public class GCatAPIClientTest {
private static GCatAPIClient client;
@BeforeAll
public static void setup() {
client = new GCatAPIClient();
client.setApplicationToken("");
client.setGcatBaseURL("");
}
@Test
public void testList() throws IOException, URISyntaxException {
System.out.println(client.list(0, 10));
}
@Test
public void testPublishAndPurge() throws IOException, URISyntaxException {
// The 'name' must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, '-'
// and '_'.
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
String objidentifier = "nstest::test";
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_pub.json"));
Assert.that(client.publish(json) == HttpStatus.SC_CREATED, "Item not created");
System.out.println("item created, now listing...");
Assert.that(client.list(0, 10).size() == 1, "List has more elements than expected");
// and then drop it
Assert
.that(
client.purge(objidentifier),
"It should return true! I just created item with name from " + objidentifier);
System.out.println("item purged");
}
@Test
public void testPurgeUnexisting() throws IOException, URISyntaxException {
String id = "1234";
Assert.that(!client.purge(id), "It should return false! The item does not exist");
}
@Test
public void testPurgeAllEmptyCat() throws IOException, URISyntaxException {
Assert
.that(
0 == client.purgeAll(),
"Expected 0 elements in the catalogue...we dropped anything that was there...sorry");
}
@Test
public void testPublishAndPurgeAll() throws IOException, URISyntaxException {
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_pub.json"));
Assert.that(client.publish(json) == HttpStatus.SC_CREATED, "Item not created");
System.out.println("item created, now listing...");
Assert.that(client.list(0, 10).size() == 1, "List has more elements than expected");
// and then drop all
Assert
.that(
1 == client.purgeAll(),
"Expected 1 elements in the catalogue...we dropped anything that was there...sorry");
}
}

View File

@ -0,0 +1,68 @@
{
"name": "nstest_test",
"private": false,
"license_id": "notspecified",
"version": "",
"title": "The role of R&D networks for exploitative and explorative regional knowledge creation",
"notes": "<p>The beneficial effect of R&amp;D networks on regional knowledge creation is widely undenied. They constitute essential means to create new knowledge through collaborative research efforts and enable access to new knowledge by bridging the way to region-external knowledge bases. However, we argue that the significance and strength of the effect differs for different modes of knowledge creation &ndash; exploitative and explorative &ndash; as well as for the quantity and quality of knowledge created. To explore these differences, we estimate a set of spatial autoregressive (SAR) models for European regions with varying network effects that are based on a region&rsquo;s network centrality in the cross-region R&amp;D network of the EU Framework Programme (FP). The results point consistently to a higher positive impact of reginal network centralities on explorative than exploitative knowledge creation. Moreover, the quantity and quality of newly created knowledge is found to be conversely affected by the regional network centralities considered. Interestingly, a high number of links (degree centrality) has in relative terms higher positive effects on the quality, rather than the pure quantity of knowledge outputs, while an authoritative network position is more conducive for increasing the quantity than the quality of knowledge.</p>",
"url": "https://beta.risis.openaire.eu/search/publication?articleId=od______2659::155332689ed5defb5d9a68a42fd8cd14",
"maintainer": "",
"extras": [
{
"key": "Publisher",
"value": "Zenodo"
},
{
"key": "Access right",
"value": "Open Access"
},
{
"key": "Collected from",
"value": "ZENODO"
},
{
"key": "PID",
"value": "doi:10.5281/zenodo.3724562"
},
{
"key": "Author",
"value": "Neuländtner, Martina"
},
{
"key": "Author",
"value": "Scherngell, Thomas"
},
{
"key": "Type",
"value": "publication"
},
{
"key": "Language",
"value": "Undetermined"
},
{
"key": "Country",
"value": ""
},
{
"key": "Subject",
"value": "R&D networks, modes of knowledge creation, exploitation, exploration, spatial autoregressive model"
},
{
"key": "Publication date",
"value": "2020-01-01"
},
{
"key": "Resource type",
"value": ""
},
{
"key": "URL",
"value": "http://dx.doi.org/10.5281/zenodo.3724562"
},
{
"key": "Hosted by",
"value": "ZENODO"
}
]
}