mapping adaptations

This commit is contained in:
Miriam Baglioni 2020-07-01 17:41:58 +02:00
parent 42ee1ef284
commit 9864bff488
13 changed files with 600 additions and 454 deletions

View File

@ -0,0 +1,4 @@
package eu.dnetlib.dhp.schema.dump.gcat;
public class Group {
}

View File

@ -0,0 +1,4 @@
package eu.dnetlib.dhp.schema.dump.gcat;
public class Tag {
}

View File

@ -14,6 +14,7 @@ import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
@ -21,6 +22,8 @@ import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
@ -63,11 +66,13 @@ public class GCatAPIClient {
StringEntity entity = new StringEntity(jsonMetadata, StandardCharsets.UTF_8);
post.setEntity(entity);
HttpResponse response = client.execute(post);
if (log.isDebugEnabled()) {
log.debug(response.getStatusLine());
System.out.println(response.getStatusLine());
log.debug(IOUtils.toString(response.getEntity().getContent()));
}
return response.getStatusLine().getStatusCode();
}
}

View File

@ -3,14 +3,18 @@ package eu.dnetlib.dhp.oa.graph.dump.gcat;
import java.io.Serializable;
import java.io.StringWriter;
import java.security.Key;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.avro.generic.GenericData;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry;
import eu.dnetlib.dhp.schema.dump.gcat.Group;
import eu.dnetlib.dhp.schema.dump.gcat.Tag;
import eu.dnetlib.dhp.schema.dump.oaf.*;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.*;
@ -19,6 +23,10 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class Mapper implements Serializable {
private static final List<String> publishers = Arrays
.asList("zenodo", "hal", "figshare", "inria", "digital.csic", "dans");
private static final List<String> access = Arrays.asList("open", "closed", "embargoed", "restricted");
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> CatalogueEntry map(I input) {
final CatalogueEntry out = new CatalogueEntry();
@ -28,26 +36,25 @@ public class Mapper implements Serializable {
Set<String> cfSet = new HashSet<>();
Set<String> hbSet = new HashSet<>();
Set<String> countrySet = new HashSet<>();
Set<String> groups = new HashSet<>();
List<Group> groupList = new ArrayList<>();
if (ort.isPresent()) {
switch (ort.get().getClassid()) {
case "publication":
externals
.add(
KeyValue
.newInstance(
"Journal", Optional
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal())
.map(value -> value.getName() + ", " + value.getVol() + ", " + value.getIss())
.orElse("")));
// if (journal.isPresent()) {
// Journal j = journal.get();
// KeyValue kv = new KeyValue();
// kv.setKey("Journal");
// kv.setValue(j.getName() + ", " + j.getVol() + ", " + j.getIss());
// externals.add(kv);
// }
Optional<Journal> oJournal = Optional
.ofNullable(((Publication) input).getJournal());
if (oJournal.isPresent()) {
Journal value = oJournal.get();
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Journal",
// "Journal",
value.getName() + ", " + value.getVol() + ", " + value.getIss()));
}
out.setUrl(Constants.PUBLICATION_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("Result Type", "publication"));
externals.add(KeyValue.newInstance("system:type", "publication"));
break;
case "dataset":
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
@ -58,7 +65,7 @@ public class Mapper implements Serializable {
.map(v -> v.getValue())
.orElse(""));
out.setUrl(Constants.DATASET_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("Result Type", "dataset"));
externals.add(KeyValue.newInstance("system:type", "dataset"));
break;
case "software":
eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
@ -80,12 +87,12 @@ public class Mapper implements Serializable {
// .ifPresent(
// value -> externals.add(KeyValue.newInstance("Programming Language", value.getClassname())));
out.setUrl(Constants.SOFTWARE_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("Result Type", "software"));
externals.add(KeyValue.newInstance("system:type", "software"));
break;
case "other":
out.setUrl(Constants.ORP_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("Result Type", "other"));
externals.add(KeyValue.newInstance("system:type", "other"));
break;
}
@ -96,23 +103,28 @@ public class Mapper implements Serializable {
.ofNullable(input.getAuthor());
List<String> authList = new ArrayList<>();
if (oauth.isPresent()) {
authList = oauth.get().stream().map(v -> v.getFullname()).collect(Collectors.toList());
authList = oauth.get().stream().map(v -> getAuthor(v)).collect(Collectors.toList());
}
if (authList.size() == 0) {
externals.add(KeyValue.newInstance("Author", ""));
} else {
authList.forEach(a -> externals.add(KeyValue.newInstance("Author", a)));
if (authList.size() > 0) {
authList.forEach(a -> externals.add(KeyValue.newInstance("Risis2_Attribution:Author", a)));
// authList.forEach(a -> externals.add(KeyValue.newInstance("Author", a)));
}
externals
.add(
KeyValue
.newInstance(
"Access Right", Optional
.ofNullable(input.getBestaccessright())
.map(
value -> value.getClassname())
.orElse("")));
String accessr = Optional
.ofNullable(input.getBestaccessright())
.map(
value -> value.getClassid())
.orElse("");
if (access.contains(accessr.toLowerCase())) {
groupList.add(Group.newInstance(null, accessr.toLowerCase().trim()));
}
if (!accessr.equals("")) {
externals
.add(
KeyValue
.newInstance(
"AccessMode:Access Right", input.getBestaccessright().getClassname()));
}
Optional
.ofNullable(input.getCollectedfrom())
@ -123,9 +135,16 @@ public class Mapper implements Serializable {
Optional<List<Field<String>>> ocont = Optional
.ofNullable(input.getContributor());
if (ocont.isPresent()) {
ocont.get().forEach(v -> externals.add(KeyValue.newInstance("Contributor", v.getValue())));
} else {
externals.add(KeyValue.newInstance("Contributor", ""));
ocont
.get()
.forEach(
v -> externals
.add(
KeyValue
.newInstance(
"Risis2_Attribution:Contributor",
v.getValue())));
// .forEach(v -> externals.add(KeyValue.newInstance("Contributor", v.getValue())));
}
Optional
@ -144,18 +163,18 @@ public class Mapper implements Serializable {
.add(
KeyValue
.newInstance(
"Embargo End Date", Optional
"AccessMode:Embargo End Date", Optional
.ofNullable(input.getEmbargoenddate())
.map(value -> value.getValue())
.orElse("")));
// .ifPresent(oStr -> externals.add(KeyValue.newInstance("Embargo End Date", oStr.getValue())));
final Set<String> formatSet = new HashSet<>();
Optional
.ofNullable(input.getFormat())
.ifPresent(value -> value.forEach(f -> formatSet.add(f.getValue())));
String id = input.getId().toLowerCase();
String id = input.getId();
id = id.substring(0, id.lastIndexOf(":") + 1) + "a" + id.substring(id.lastIndexOf(":") + 1);
out.setName(id.substring(id.indexOf('|') + 1).replace(":", "-"));
final Set<String> itSet = new HashSet<>();
@ -187,7 +206,6 @@ public class Mapper implements Serializable {
.ofNullable(input.getLanguage())
.map(value -> value.getClassname())
.orElse("")));
// .ifPresent(value -> externals.add(KeyValue.newInstance("Language", value.getClassname())));
List<StructuredProperty> iTitle = Optional
.ofNullable(input.getTitle())
@ -204,36 +222,53 @@ public class Mapper implements Serializable {
out.setTitle("");
}
final Set<String> pidSet = new HashSet<>();
Optional
.ofNullable(input.getPid())
.ifPresent(
value -> value
.forEach(
v -> pidSet.add(v.getQualifier().getClassid() + ":" + v.getValue())));
v -> {
if (v.getQualifier().getClassid().equalsIgnoreCase("DOI")) {
externals
.add(
KeyValue
.newInstance("Identity:PID", "https://www.doi.org/" + v.getValue()));
} else {
externals
.add(
KeyValue
.newInstance(
"Identity:PID",
v.getQualifier().getClassid() + ":" + v.getValue()));
}
}));
externals
.add(
KeyValue
.newInstance(
"Publication Date", Optional
"Risis2_Publishing:Publication Date", Optional
// "Publication Date", Optional
.ofNullable(input.getDateofacceptance())
.map(value -> value.getValue())
.orElse("")));
// .ifPresent(value -> externals.add(KeyValue.newInstance("Publication Date", value.getValue())));
externals
.add(
KeyValue
.newInstance(
"Publisher", Optional
.ofNullable(input.getPublisher())
.map(value -> value.getValue())
.orElse("")));
// .ifPresent(value -> externals.add(KeyValue.newInstance("Publisher", value.getValue())));
String publisher = Optional
.ofNullable(input.getPublisher())
.map(value -> value.getValue())
.orElse("");
Set<String> kwSet = new HashSet<>();
Set<String> sbjList = new HashSet<>();
if (!publisher.equals("")) {
groups.add(publisher.toLowerCase());
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Publisher", publisher));
}
Set<String> tagsSet = new HashSet<>();
Optional
.ofNullable(input.getSubject())
.ifPresent(
@ -241,39 +276,53 @@ public class Mapper implements Serializable {
.forEach(
s -> {
String classId = s.getQualifier().getClassid();
String prefix = "";
if (!classId.equals("keyword") &&
StringUtils.isNotEmpty(classId)) {
sbjList.add(classId + ":" + s.getValue());
} else {
kwSet.add(s.getValue());
prefix = classId + ".";
}
String tag = prefix + s.getValue();
tag = tagReplacements(tag);
tagsSet.add(tag);
}));
// if(sbjList.size() == 0){
// externals
// .add(
// KeyValue
// .newInstance("Subject(s)", ""));
// }else{
// sbjList.forEach(s -> externals
// .add(
// KeyValue
// .newInstance("Subject",s )));
// }
cfSet.remove("Unknown Repository");
externals.add(KeyValue.newInstance("Collected From", getListOfValues(cfSet)));
externals.add(KeyValue.newInstance("Risis2_Publishing:Collected From", getListOfValues(cfSet)));
hbSet.remove("Unknown Repository");
externals.add(KeyValue.newInstance("Hosted By", getListOfValues(hbSet)));
externals.add(KeyValue.newInstance("URL(s)", getListOfValues(urlSet)));
externals.add(KeyValue.newInstance("Risis2_Publishing:Hosted By", getListOfValues(hbSet)));
cfSet.forEach(cf -> groups.add(cf.toLowerCase()));
hbSet.forEach(hb -> groups.add(hb.toLowerCase()));
groups.forEach(g -> {
if (publishers.contains(g.trim())) {
groupList.add(Group.newInstance(null, g.trim()));
}
});
out.setGroups(groupList);
urlSet.stream().forEach(url -> externals.add(KeyValue.newInstance("Identity:URL", url)));
externals.add(KeyValue.newInstance("Country", getListOfValues(countrySet)));
externals.add(KeyValue.newInstance("Format", getListOfValues(formatSet)));
externals.add(KeyValue.newInstance("PID(s)", getListOfValues(pidSet)));
externals.add(KeyValue.newInstance("Resource Type", getListOfValues(itSet)));
externals.add(KeyValue.newInstance("Keyword(s)", getListOfValues(kwSet)));
externals.add(KeyValue.newInstance("subject(s)", getListOfValues(sbjList)));
List<Tag> tags = new ArrayList<>();
List<String> kws = new ArrayList<>();
tagsSet.forEach(tag -> {
if (tag.endsWith(",") || tag.endsWith(";") || tag.endsWith(".")) {
tag = tag.substring(0, tag.length() - 1);
}
if (tag.matches("^[a-zA-Z0-9_. -]*$") && tag.length() > 1 && tag.length() < 101) {
tags.add(Tag.newInstance(tag));
} else {
kws.add(tag);
}
});
out.setTags(tags);
if (kws.size() > 0) {
kws.forEach(k -> externals.add(KeyValue.newInstance("keyword", k)));
}
out.setExtras(externals);
}
@ -281,6 +330,41 @@ public class Mapper implements Serializable {
return out;
}
public static String tagReplacements(String tag) {
return tag
.replace("&", " and ")
// .replace(" ", "_")
.replace("(", "_")
.replace(")", "_")
.replace("/", "_")
.replace("\\", "_")
.replace("[", "")
.replace("]", "")
.trim();
}
private static String getAuthor(Author v) {
String author = v.getFullname();
Optional<List<StructuredProperty>> oPid = Optional.ofNullable(v.getPid());
if (oPid.isPresent()) {
List<String> oList = oPid
.get()
.stream()
.filter(
p -> p
.getQualifier()
.getClassid()
.equalsIgnoreCase("orcid"))
.map(o -> o.getValue())
.collect(Collectors.toList());
if (oList.size() > 0)
author += ", " + oList.get(0);
}
return author;
}
private static String getListOfValues(Set<String> cfSet) {
StringWriter sw = new StringWriter();
cfSet.forEach(value -> sw.append(value + "; "));

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.oa.graph.dump.gcat;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import org.apache.commons.io.IOUtils;
import java.io.Serializable;
public class SendToCatalogue implements Serializable {
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SendToCatalogue.class
.getResourceAsStream(
"/eu/dnetlib/dhp/blacklist/catalogue_parameters.json")));
parser.parseArgument(args);
final String access_token = parser.get("accessToken");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode");
}
}

View File

@ -25,9 +25,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.SparkDumpCommunityProducts;
import eu.dnetlib.dhp.oa.graph.dump.gcat.Mapper;
import eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue;
//@ExtendWith(MockitoExtension.class)
public class DumpJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -83,8 +83,6 @@ public class DumpJobTest {
"-communityName", "risis"
});
// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> tmp = sc
@ -95,120 +93,23 @@ public class DumpJobTest {
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
Assertions.assertEquals(3, verificationDataset.count());
// verificationDataset.show(false);
verificationDataset.show(false);
// verificationDataset.select("instance.type").show(false);
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
}
// @Test
// public void testPublication() throws Exception {
//
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
// .getPath();
//
// SparkDumpCommunityProducts.main(new String[] {
// "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-outputPath", workingDir.toString() + "/result",
// "-sourcePath", sourcePath,
// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
// "-communityMap", new Gson().toJson(map)
// });
//
//// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
// .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// Assertions.assertEquals(76, verificationDataset.count());
// verificationDataset.show(false);
//
// Assertions.assertEquals(76, verificationDataset.filter("type = 'publication'").count());
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// }
//
// @Test
// public void testSoftware() throws Exception {
//
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
// .getPath();
//
// SparkDumpCommunityProducts.main(new String[] {
// "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-outputPath", workingDir.toString() + "/result",
// "-sourcePath", sourcePath,
// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
// "-communityMap", new Gson().toJson(map)
// });
//
//// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
// .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// Assertions.assertEquals(6, verificationDataset.count());
//
// Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
// verificationDataset.show(false);
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// }
//
// @Test
// public void testORP() throws Exception {
//
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
// .getPath();
//
// SparkDumpCommunityProducts.main(new String[] {
// "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-outputPath", workingDir.toString() + "/result",
// "-sourcePath", sourcePath,
// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
// "-communityMap", new Gson().toJson(map)
// });
//
//// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
// .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// Assertions.assertEquals(3, verificationDataset.count());
//
// Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
// verificationDataset.show(false);
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// }
@Test
public void remove() {
String tag = "F - International Economics_F.F2 - International Factor Movements and International Business_F.F2.F22 - International Migration JEL";
String[] tmp = tag.split("[,;.]");
System.out.println(Mapper.tagReplacements(tag));
Assertions.assertTrue(tag.matches("^[a-zA-Z0-9_. -]*$"));
System.out.println(tag.length());
}
}

View File

@ -60,7 +60,7 @@ public class GCatAPIClientTest {
// and '_'.
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat.json"));
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_test.json"));
System.out.println("Creating item...");
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
System.out.println("item created, now listing...");
@ -69,20 +69,44 @@ public class GCatAPIClientTest {
}
@Test
public void bulkPublish() throws IOException, URISyntaxException {
public void bulkPublishORP() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_publications")
.getPath()));
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_other.json")
.getPath()));
String line;
while((line = reader.readLine())!= null){
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void bulkPublishDATS() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat.json")
.getPath()));
String line;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void bulkPublishPUBS() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_publications")
.getPath()));
String line;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void purgeItem() throws IOException, URISyntaxException {
String objidentifier = "datacite____--6b1e3a2fa60ed8c27317a66d6357f795";
String objidentifier = "fake";
Assertions.assertTrue(client.purge(objidentifier));
System.out.println("item purged");
@ -96,7 +120,7 @@ public class GCatAPIClientTest {
@Test
public void testPurgeAllEmptyCat() throws IOException, URISyntaxException {
Assertions.assertEquals(0, client.purgeAll());
Assertions.assertEquals(179, client.purgeAll());
}
@Test

View File

@ -1 +1,2 @@
{"extras":[{"key":"Result Type","value":"dataset"},{"key":"Author","value":"Philippe Laredo"},{"key":"Access Right","value":"Open Access"},{"key":"Contributor","value":"European Commission"},{"key":"Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Publication Date","value":"2019-02-08"},{"key":"Publisher","value":"Zenodo"},{"key":"Collected From","value":"ZENODO; Datacite; figshare"},{"key":"Hosted By","value":"Zenodo; ZENODO; figshare"},{"key":"URL(s)","value":"http://dx.doi.org/10.5281/zenodo.2560117; https://zenodo.org/record/2560117; http://dx.doi.org/10.5281/zenodo.2560116; https://figshare.com/articles/Introduction_of_RISIS_project_by_Philippe_Laredo/7699286"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"PID(s)","value":"doi:10.5281/zenodo.2560116; doi:10.5281/zenodo.2560117"},{"key":"Resource Type","value":"Audiovisual"},{"key":"Keyword(s)","value":""},{"key":"subject(s)","value":""}],"license_id":"notspecified","name":"dedup_wf_001--10160b3eafcedeb0a384fc400fe1c3fa","notes":"<p>Introduction of RISIS project by Philippe Laredo</p>","title":"Introduction of RISIS project by Philippe Laredo","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::10160b3eafcedeb0a384fc400fe1c3fa","version":"None"}
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Laredo, Philippe, 0000-0002-5014-9132"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560117"},{"key":"Risis2_Publishing:Publication Date","value":"2019-02-08"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"ZENODO; Datacite; figshare"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO; figshare"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560117"},{"key":"Identity:URL","value":"https://zenodo.org/record/2560117"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:URL","value":"https://figshare.com/articles/Introduction_of_RISIS_project_by_Philippe_Laredo/7699286"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Audiovisual"}],"groups":[{"name":"open"},{"name":"zenodo"},{"name":"figshare"}],"license_id":"notspecified","name":"dedup_wf_001--a10160b3eafcedeb0a384fc400fe1c3fa","notes":"<p>Introduction of RISIS project by Philippe Laredo</p>","tags":[],"title":"Introduction of RISIS project by Philippe Laredo","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::10160b3eafcedeb0a384fc400fe1c3fa","version":"None"}
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Lepori, Benedetto"},{"key":"Risis2_Attribution:Author","value":"Guerini, Massimilano"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752860"},{"key":"Risis2_Publishing:Publication Date","value":"2020-04-15"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"Zenodo; ZENODO; Datacite"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:URL","value":"https://zenodo.org/record/3752861"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752860"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Dataset"}],"groups":[{"name":"open"},{"name":"zenodo"}],"license_id":"notspecified","name":"dedup_wf_001--ac4634a42d4b98e594e0796a41b47ec61","notes":"<p>This file provides the correspondence table between EUROSTAT NUTS3 classification and the adapted regional classification used by the RISIS-KNOWMAK project. This regional classification fits the structure of knowledge production in Europe and addresses some knowm problems of the NUTS3 classification, such as the treatment of large agglomerations, while remaining fully compatible with the EUROSTAT NUTS regional classification. This compatibility allows combining all KNOWMAK data with regional statistics (at NUTS3 level, 2016 edition) from EUROSTAT.</p>\n\n<p>More precisely, the classification includes EUROSTAT metropolitan regions (based on the aggregation of NUTS3-level regions) and NUTS2 regions for the remaining areas; further, a few additional centers for knowledge production, like Oxford and Leuven, have been singled out at NUTS3 level. The resulting classification is therefore more fine-grained than NUTS2 in the areas with sizeable knowledge production, but at the same time recognizes the central role of metropolitan areas in knowledge production. While remaining compatible with NUTS, the classification allows addressing two well-known shortcomings: a) the fact that some large cities are split between NUTS regions (London) and b) the fact that NUTS3 classification in some countries includes many very small regions, as in the case of Germany</p>","tags":[],"title":"RISIS-KNOWMAK NUTS adapted classification","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::c4634a42d4b98e594e0796a41b47ec61","version":""}

View File

@ -1,97 +0,0 @@
{
"extras": [
{
"key": "Programming Language",
"value": "UNKNOWN"
},
{
"key": "Result Type",
"value": "software"
},
{
"key": "Author",
"value": "Regev, Mor"
},
{
"key": "Author",
"value": "Simony, Erez"
},
{
"key": "Author",
"value": "Lee, Katherine"
},
{
"key": "Author",
"value": "Tan, Kean Ming"
},
{
"key": "Author",
"value": "Chen, Janice"
},
{
"key": "Author",
"value": "Hasson, Uri"
},
{
"key": "Access Right",
"value": "not available"
},
{
"key": "Contributor",
"value": "Regev, Mor"
},
{
"key": "Embargo End Date",
"value": ""
},
{
"key": "Language",
"value": "en-us"
},
{
"key": "Publication Date",
"value": "2018-01-01"
},
{
"key": "Publisher",
"value": "Code Ocean"
},
{
"key": "Collected From",
"value": "Datacite"
},
{
"key": "Hosted By",
"value": "Code Ocean"
},
{
"key": "URL",
"value": "http://dx.doi.org/10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c; fake; https://codeocean.com/2018/10/30/intersubject-functional-correlation-lpar-isfc-rpar-as-a-function-of-attention"
},
{
"key": "Country",
"value": ""
},
{
"key": "Format",
"value": ""
},
{
"key": "PID",
"value": "doi:10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c"
},
{
"key": "Resource Type",
"value": "Software"
},
{
"key": "Keyword",
"value": "neuroscience; attention; Capsule; Biology; language; fmri"
}
],
"license_id": "notspecified",
"name": "datacite____--6b1e3a2fa60ed8c27317a66d6357f795",
"notes": "This capsule demonstrates the inter-subject functional correlation (ISFC) analysis described in \"Propagation of information along the cortical hierarchy as a function of attention while reading and listening to stories \" by Regev, Simony, Lee, Tan, Chen and Hasson.",
"title": "Intersubject functional correlation (ISFC) as a function of attention",
"url": "https://beta.risis.openaire.eu/search/software?softwareId=datacite____::6b1e3a2fa60ed8c27317a66d6357f795"
}

View File

@ -0,0 +1,170 @@
{
"extras": [
{
"key": "Journal",
"value": "International Journal of Technology Management, 80, null"
},
{
"key": "system:type",
"value": "publication"
},
{
"key": "Author",
"value": "Laurens, Patricia"
},
{
"key": "Author",
"value": "Le Bas, Christian"
},
{
"key": "Author",
"value": "Schoen, Antoine"
},
{
"key": "AccessMode:Access Right",
"value": "Open Access"
},
{
"key": "Contributor",
"value": "Laboratoire Interdisciplinaire Sciences, Innovations, Sociétés (LISIS) ; Institut National de la Recherche Agronomique (INRA)-Université Paris-Est Marne-la-Vallée (UPEM)-ESIEE Paris-Centre National de la Recherche Scientifique (CNRS)"
},
{
"key": "Contributor",
"value": "ESDES - École de management de Lyon ; Université Catholique de Lyon"
},
{
"key": "Contributor",
"value": "This work was supported by RISIS-funded by the European Union\u2019s Horizon2020 Research and innovation programme under grant number 313082 and 824091"
},
{
"key": "Contributor",
"value": "European Project: 313082,EC:FP7:INFRA,FP7-INFRASTRUCTURES-2012-1,RISIS(2014)"
},
{
"key": "Contributor",
"value": "European Project: 824091,H2020-EU.1.4.1.2,H2020-INFRAIA-2018-1,RISIS2(2019)"
},
{
"key": "Contributor",
"value": "Laboratoire Interdisciplinaire Sciences, Innovations, Société\n (\nLISIS\n)\n\n ; \nInstitut National de la Recherche Agronomique\n (\nINRA\n)\n-Université Paris-Est Marne-la-Vallée\n (\nUPEM\n)\n-ESIEE Paris-Centre National de la Recherche Scientifique\n (\nCNRS\n)"
},
{
"key": "Contributor",
"value": "ESDES - École de management de Lyon\n ; \nUniversité Catholique de Lyon"
},
{
"key": "Contributor",
"value": "Laboratoire Interdisciplinaire Sciences, Innovations, Sociétés (LISIS) ; Centre National de la Recherche Scientifique (CNRS)-ESIEE Paris-Université Paris-Est Marne-la-Vallée (UPEM)-Institut National de la Recherche Agronomique (INRA)"
},
{
"key": "AccessMode:Embargo End Date",
"value": ""
},
{
"key": "Language",
"value": "Undetermined"
},
{
"key": "Identity:PID",
"value": "https://www.doi.org/10.1504/ijtm.2019.100283"
},
{
"key": "Identity:PID",
"value": "https://www.doi.org/10.1504/ijtm.2019.10022013"
},
{
"key": "Publication Date",
"value": "2019-01-01"
},
{
"key": "Publisher",
"value": "Inderscience Publishers"
},
{
"key": "Collected From",
"value": "UnpayWall; INRIA a CCSD electronic archive server; HAL Descartes; HAL - UPEC / UPEM; Crossref; Hyper Article en Ligne; Microsoft Academic Graph; Hyper Article en Ligne - Sciences de l'Homme et de la Société"
},
{
"key": "Hosted By",
"value": "INRIA a CCSD electronic archive server; HAL Descartes; HAL - UPEC / UPEM; Hyper Article en Ligne; Hyper Article en Ligne - Sciences de l'Homme et de la Société; International Journal of Technology Management"
},
{
"key": "Identity:URL",
"value": "https://hal.archives-ouvertes.fr/hal-01725229"
},
{
"key": "Identity:URL",
"value": "https://hal.archives-ouvertes.fr/hal-01725229/document"
},
{
"key": "Identity:URL",
"value": "https://academic.microsoft.com/#/detail/2791245388"
},
{
"key": "Identity:URL",
"value": "http://dx.doi.org/10.1504/ijtm.2019.10022013"
},
{
"key": "Identity:URL",
"value": "http://www.inderscienceonline.com/doi/full/10.1504/IJTM.2019.100283"
},
{
"key": "Identity:URL",
"value": "https://hal.archives-ouvertes.fr/hal-01725229/file/IP%20internationalisation_2017.pdf"
},
{
"key": "Identity:URL",
"value": "http://dx.doi.org/10.1504/ijtm.2019.100283"
},
{
"key": "Identity:URL",
"value": "http://www.inderscienceonline.com/doi/full/10.1504/IJTM.2019.10022013"
},
{
"key": "Country",
"value": "France"
},
{
"key": "Format",
"value": ""
},
{
"key": "Resource Type",
"value": "Article"
},
{
"key": "keyword",
"value": "Manufacturing_L.L6.L65 - Chemicals \u2022 Rubber \u2022 Drugs \u2022 Biotechnology JEL"
},
{
"key": "keyword",
"value": "O - Economic Development, Innovation, Technological Change, and Growth_O.O3 - Innovation \u2022 Research and Development \u2022 Technological Change \u2022 Intellectual Property Rights_O.O3.O34 - Intellectual Property and Intellectual Capital JEL"
}
],
"license_id": "notspecified",
"name": "dedup_wf_001--a48fee33ea4df43e302f6957209893f81",
"notes": "International audience; The paper deals with the determinants of worldwide IP coverage of patented inventions in large pharmaceutical firms. We support the core idea that the internationalisation of firm R&D and an economic presence in a foreign country are positive key factors which explains global IP coverage. For the global pharmaceutical industry, we estimate probit models on the probability that a patent will be expanded worldwide. We retain two categories of worldwide patent: the well-known triadic patent and the new triadic one (triadic + China + Korea). The data set encompasses the 17,633 priority patents applied for by 76 enterprises from several countries over the period 2003-2005. One important finding is that patenting in Japan sets up an important barrier, giving Japanese firms an advantage when triadic patenting is considered. For European and US firms, our estimation results confirm the idea that the level of firm R&D internationalisation is a significant explanatory factor in international IP coverage, together with control variables. We highlight an inverted U-shaped relationship between these two variables. The hypothesis related to a firm economic presence is also verified.",
"tags": [
{"name": "Economics"},
{"name": "Industrial relations"},
{"name": "Law"},
{"name": "business"},
{"name": "F - International Economics_F.F2 - International Factor Movements and International Business_F.F2.F22 - International Migration JEL"},
{"name": "Strategy and Management"},
{"name": "Probit model"},
{"name": "SHS.GESTION Humanities and Social Sciences_Business administration"},
{"name": "General Engineering"},
{"name": "Marketing"},
{"name": "Control variable"},
{"name": "Industrial organization"},
{"name": "Computer Science Applications"},
{"name": "China"},
{"name": "Internationalization"},
{"name": "business.industry"},
{"name": "Pharmaceutical industry"},
{"name": "Foreign country"},
{"name": "Firm strategy"}
],
"title": "Worldwide IP coverage of patented inventions in large pharma firms: to what extent do the internationalisation of R&D and firm strategy matter",
"url": "https://beta.risis.openaire.eu/search/publication?articleId=dedup_wf_001::48fee33ea4df43e302f6957209893f81"
}