This commit is contained in:
Alessia Bardi 2020-07-02 15:50:18 +02:00
commit 0c9539a301
19 changed files with 884 additions and 260 deletions

View File

@ -9,13 +9,31 @@ import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
public class CatalogueEntry implements Serializable {
private String name; // openaire id withouut :: substitute with $$
private String licence_id; // default "notspecified",
private String license_id; // default "notspecified",
private String title; // title.maintitle
private String notes; // description.value (the first description
private String url; // the url of the resource in the openaire dashboard
private String version; // valid for datasets
private List<Tag> tags; // subject and keywords
private List<Group> groups; // access and publishers
private List<KeyValue> extras;
public List<Group> getGroups() {
return groups;
}
public void setGroups(List<Group> groups) {
this.groups = groups;
}
public List<Tag> getTags() {
return tags;
}
public void setTags(List<Tag> tags) {
this.tags = tags;
}
public String getVersion() {
return version;
}
@ -32,12 +50,12 @@ public class CatalogueEntry implements Serializable {
this.name = name;
}
public String getLicence_id() {
return licence_id;
public String getLicense_id() {
return license_id;
}
public void setLicence_id(String licence_id) {
this.licence_id = licence_id;
public void setLicense_id(String license_id) {
this.license_id = license_id;
}
public String getTitle() {

View File

@ -0,0 +1,37 @@
package eu.dnetlib.dhp.schema.dump.gcat;
import java.io.Serializable;
public class Group implements Serializable {
private String id;
private String name;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public static Group newInstance(String id, String name) {
Group g = new Group();
if (id != null) {
g.id = id;
}
if (name != null) {
g.name = name;
}
return g;
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.schema.dump.gcat;
import java.io.Serializable;
import com.fasterxml.jackson.core.SerializableString;
public class Tag implements Serializable {
private String name;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public static Tag newInstance(String n) {
Tag t = new Tag();
t.name = n;
return t;
}
}

View File

@ -11,6 +11,7 @@ public class Constants {
public static String DATASET_URL = "https://beta.risis.openaire.eu/search/dataset?datasetId=";
public static String SOFTWARE_URL = "https://beta.risis.openaire.eu/search/software?softwareId=";
public static String ORP_URL = "https://beta.risis.openaire.eu/search/other?orpId=";
public static String DEFAULT_LICENCE_ID = "notspecified";
public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
public static final Map<String, String> gcatCatalogue = Maps.newHashMap();
@ -34,11 +35,11 @@ public class Constants {
static {
gcatCatalogue.put("OPEN", "OPEN");
accessRightsCoarMap.put("RESTRICTED", "RESTRICTED");
accessRightsCoarMap.put("OPEN SOURCE", "OPEN");
accessRightsCoarMap.put("CLOSED", "CLOSED");
accessRightsCoarMap.put("EMBARGO", "EMBARGO");
accessRightsCoarMap.put("UNKNOWN", "UNKNOWN");
accessRightsCoarMap.put("OTHER", "UNKNOWN");
gcatCatalogue.put("RESTRICTED", "RESTRICTED");
gcatCatalogue.put("OPEN SOURCE", "OPEN");
gcatCatalogue.put("CLOSED", "CLOSED");
gcatCatalogue.put("EMBARGO", "EMBARGO");
gcatCatalogue.put("UNKNOWN", "UNKNOWN");
gcatCatalogue.put("OTHER", "UNKNOWN");
}
}

View File

@ -14,6 +14,7 @@ import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
@ -21,6 +22,8 @@ import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
@ -63,11 +66,13 @@ public class GCatAPIClient {
StringEntity entity = new StringEntity(jsonMetadata, StandardCharsets.UTF_8);
post.setEntity(entity);
HttpResponse response = client.execute(post);
if (log.isDebugEnabled()) {
log.debug(response.getStatusLine());
System.out.println(response.getStatusLine());
log.debug(IOUtils.toString(response.getEntity().getContent()));
}
return response.getStatusLine().getStatusCode();
}
}

View File

@ -2,118 +2,182 @@
package eu.dnetlib.dhp.oa.graph.dump.gcat;
import java.io.Serializable;
import java.io.StringWriter;
import java.security.Key;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.avro.generic.GenericData;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry;
import eu.dnetlib.dhp.schema.dump.gcat.Group;
import eu.dnetlib.dhp.schema.dump.gcat.Tag;
import eu.dnetlib.dhp.schema.dump.oaf.*;
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class Mapper implements Serializable {
private static final List<String> publishers = Arrays
.asList("zenodo", "hal", "figshare", "digital-csic", "dans", "datacite");
private static final List<String> access = Arrays.asList("open", "closed", "embargoed", "restricted");
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> CatalogueEntry map(I input) {
final CatalogueEntry out = new CatalogueEntry();
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
List<KeyValue> externals = new ArrayList<>();
Set<String> urlSet = new HashSet<>();
Set<String> cfSet = new HashSet<>();
Set<String> hbSet = new HashSet<>();
Set<String> countrySet = new HashSet<>();
Set<String> groups = new HashSet<>();
List<Group> groupList = new ArrayList<>();
if (ort.isPresent()) {
switch (ort.get().getClassid()) {
case "publication":
Optional<Journal> journal = Optional
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
if (journal.isPresent()) {
Journal j = journal.get();
KeyValue kv = new KeyValue();
kv.setKey("journal");
kv.setValue(j.getName() + ", " + j.getVol() + ", " + j.getIss());
externals.add(kv);
Optional<Journal> oJournal = Optional
.ofNullable(((Publication) input).getJournal());
if (oJournal.isPresent()) {
Journal value = oJournal.get();
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Journal",
// "Journal",
value.getName() + ", " + value.getVol() + ", " + value.getIss()));
}
out.setUrl(Constants.PUBLICATION_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "publication"));
break;
case "dataset":
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
out
.setVersion(
Optional
.ofNullable(id.getVersion())
.map(v -> v.getValue())
.orElse(""));
out.setUrl(Constants.DATASET_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "dataset"));
break;
case "software":
eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
Optional
.ofNullable(is.getCodeRepositoryUrl())
.ifPresent(value -> externals.add(KeyValue.newInstance("url", value.getValue())));
.ifPresent(value -> urlSet.add(value.getValue()));
Optional
.ofNullable(is.getDocumentationUrl())
.ifPresent(
value -> value
.stream()
.map(v -> externals.add(KeyValue.newInstance("url", v.getValue()))));
.ifPresent(value -> value.forEach(v -> urlSet.add(v.getValue())));
Optional
.ofNullable(is.getProgrammingLanguage())
.ifPresent(
value -> externals.add(KeyValue.newInstance("programming language", value.getClassname())));
externals
.add(
KeyValue
.newInstance(
"Programming Language", Optional
.ofNullable(is.getProgrammingLanguage())
.map(v -> v.getClassname())
.orElse("")));
// .ifPresent(
// value -> externals.add(KeyValue.newInstance("Programming Language", value.getClassname())));
out.setUrl(Constants.SOFTWARE_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "software"));
break;
case "other":
out.setUrl(Constants.ORP_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("system:type", "other"));
break;
}
Optional
.ofNullable(input.getAuthor())
.ifPresent(
value -> value
.forEach(v -> externals.add(KeyValue.newInstance("author", v.getFullname()))));
out.setLicense_id(Constants.DEFAULT_LICENCE_ID);
Optional
Optional<List<Author>> oauth = Optional
.ofNullable(input.getAuthor());
List<String> authList = new ArrayList<>();
if (oauth.isPresent()) {
authList = oauth.get().stream().map(v -> getAuthor(v)).collect(Collectors.toList());
}
if (authList.size() > 0) {
authList.forEach(a -> externals.add(KeyValue.newInstance("Risis2_Attribution:Author", a)));
// authList.forEach(a -> externals.add(KeyValue.newInstance("Author", a)));
}
String accessr = Optional
.ofNullable(input.getBestaccessright())
.ifPresent(
value -> externals
.add(KeyValue.newInstance("access right", Constants.gcatCatalogue.get(value.getClassid()))));
.map(
value -> value.getClassid())
.orElse("");
if (access.contains(accessr.toLowerCase())) {
groupList.add(Group.newInstance(null, accessr.toLowerCase().trim()));
}
if (!accessr.equals("")) {
externals
.add(
KeyValue
.newInstance(
"AccessMode:Access Right", input.getBestaccessright().getClassname()));
}
Optional
.ofNullable(input.getCollectedfrom())
.ifPresent(
value -> value
.forEach(v -> externals.add(KeyValue.newInstance("collected from", v.getValue()))));
.forEach(v -> cfSet.add(v.getValue())));
Optional
.ofNullable(input.getContributor())
.ifPresent(
value -> value
.forEach(v -> externals.add(KeyValue.newInstance("contributor", v.getValue()))));
Optional<List<Field<String>>> ocont = Optional
.ofNullable(input.getContributor());
if (ocont.isPresent()) {
ocont
.get()
.forEach(
v -> externals
.add(
KeyValue
.newInstance(
"Risis2_Attribution:Contributor",
v.getValue())));
// .forEach(v -> externals.add(KeyValue.newInstance("Contributor", v.getValue())));
}
Optional
.ofNullable(input.getCountry())
.ifPresent(
value -> value
.forEach(v -> externals.add(KeyValue.newInstance("country", v.getClassname()))));
.forEach(v -> countrySet.add(v.getClassname())));
final List<String> descriptionList = new ArrayList<>();
Optional
.ofNullable(input.getDescription())
.ifPresent(value -> {
Iterator<Field<String>> it = value.iterator();
out.setName(it.next().getValue());
it.forEachRemaining(v -> externals.add(KeyValue.newInstance("description", v.getValue())));
});
.ifPresent(value ->
Optional
.ofNullable(input.getEmbargoenddate())
.ifPresent(oStr -> externals.add(KeyValue.newInstance("embargo end date", oStr.getValue())));
getDescription(out, externals, value));
final List<String> formatList = new ArrayList<>();
externals
.add(
KeyValue
.newInstance(
"AccessMode:Embargo End Date", Optional
.ofNullable(input.getEmbargoenddate())
.map(value -> value.getValue())
.orElse("")));
final Set<String> formatSet = new HashSet<>();
Optional
.ofNullable(input.getFormat())
.ifPresent(value -> value.forEach(f -> formatList.add(f.getValue())));
.ifPresent(value -> value.forEach(f -> formatSet.add(f.getValue())));
out.setName(input.getId().replace(":", "$"));
String id = input.getId();
id = id.substring(0, id.lastIndexOf(":") + 1) + "a" + id.substring(id.lastIndexOf(":") + 1);
out.setName(id.substring(id.indexOf('|') + 1).replace(":", "-"));
final Set<String> itSet = new HashSet<>();
Optional
.ofNullable(input.getInstance())
.ifPresent(
@ -122,19 +186,26 @@ public class Mapper implements Serializable {
Optional
.ofNullable(v.getHostedby())
.ifPresent(hb -> externals.add(KeyValue.newInstance("hosted by", hb.getValue())));
.ifPresent(hb -> hbSet.add(hb.getValue()));
final HashSet<String> urlSet = new HashSet<>();
Optional
.ofNullable(v.getUrl())
.ifPresent(u -> u.forEach(url -> urlSet.add(url)));
urlSet.forEach(url -> externals.add(KeyValue.newInstance("url", url)));
Optional
.ofNullable(v.getInstancetype())
.ifPresent(it -> itSet.add(it.getClassname()));
}));
Optional
.ofNullable(input.getLanguage())
.ifPresent(value -> externals.add(KeyValue.newInstance("language", value.getClassname())));
externals
.add(
KeyValue
.newInstance(
"Language", Optional
.ofNullable(input.getLanguage())
.map(value -> value.getClassname())
.orElse("")));
List<StructuredProperty> iTitle = Optional
.ofNullable(input.getTitle())
@ -147,6 +218,8 @@ public class Mapper implements Serializable {
if (iTitle.size() > 0) {
out.setTitle(iTitle.get(0).getValue());
} else {
out.setTitle("");
}
Optional
@ -154,30 +227,102 @@ public class Mapper implements Serializable {
.ifPresent(
value -> value
.forEach(
v -> externals
.add(KeyValue.newInstance("pid", v.getQualifier().getClassid() + ":" + v.getValue()))));
v -> {
if (v.getQualifier().getClassid().equalsIgnoreCase("DOI")) {
externals
.add(
KeyValue
.newInstance("Identity:PID", "https://www.doi.org/" + v.getValue()));
} else {
externals
.add(
KeyValue
.newInstance(
"Identity:PID",
v.getQualifier().getClassid() + ":" + v.getValue()));
}
Optional
.ofNullable(input.getDateofacceptance())
.ifPresent(value -> externals.add(KeyValue.newInstance("publication date", value.getValue())));
}));
Optional
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Publication Date", Optional
// "Publication Date", Optional
.ofNullable(input.getDateofacceptance())
.map(value -> value.getValue())
.orElse("")));
String publisher = Optional
.ofNullable(input.getPublisher())
.ifPresent(value -> externals.add(KeyValue.newInstance("publisher", value.getValue())));
.map(value -> value.getValue())
.orElse("");
List<ControlledField> subjectList = new ArrayList<>();
if (!publisher.equals("")) {
groups.add(publisher.toLowerCase().replace(".", "-"));
externals
.add(
KeyValue
.newInstance(
"Risis2_Publishing:Publisher", publisher));
}
Set<String> tagsSet = new HashSet<>();
Optional
.ofNullable(input.getSubject())
.ifPresent(
value -> value
.stream()
.forEach(
s -> externals
.add(
KeyValue
.newInstance("subject", s.getQualifier().getClassid() + ":" + s.getValue()))));
externals.add(KeyValue.newInstance("resource type", input.getResourcetype().getClassid()));
s -> {
String classId = s.getQualifier().getClassid();
String prefix = "";
if (!classId.equals("keyword") &&
StringUtils.isNotEmpty(classId)) {
prefix = classId + ".";
}
String tag = prefix + s.getValue();
tag = tagReplacements(tag);
tagsSet.add(tag);
}));
cfSet.remove("Unknown Repository");
externals.add(KeyValue.newInstance("Risis2_Publishing:Collected From", getListOfValues(cfSet)));
hbSet.remove("Unknown Repository");
externals.add(KeyValue.newInstance("Risis2_Publishing:Hosted By", getListOfValues(hbSet)));
cfSet.forEach(cf -> groups.add(cf.toLowerCase().replace(".", "-")));
hbSet.forEach(hb -> groups.add(hb.toLowerCase().replace(".", "-")));
groups.forEach(g -> {
if (publishers.contains(g.trim())) {
groupList.add(Group.newInstance(null, g.trim()));
}
});
out.setGroups(groupList);
urlSet.stream().forEach(url -> externals.add(KeyValue.newInstance("Identity:URL", url)));
externals.add(KeyValue.newInstance("Country", getListOfValues(countrySet)));
externals.add(KeyValue.newInstance("Format", getListOfValues(formatSet)));
externals.add(KeyValue.newInstance("Resource Type", getListOfValues(itSet)));
List<Tag> tags = new ArrayList<>();
List<String> kws = new ArrayList<>();
tagsSet.forEach(tag -> {
if (tag.endsWith(",") || tag.endsWith(";") || tag.endsWith(".")) {
tag = tag.substring(0, tag.length() - 1);
}
if (tag.matches("^[a-zA-Z0-9_. -]*$") && tag.length() > 1 && tag.length() < 101) {
tags.add(Tag.newInstance(tag));
} else {
kws.add(tag);
}
});
out.setTags(tags);
if (kws.size() > 0) {
kws.forEach(k -> externals.add(KeyValue.newInstance("keyword", k)));
}
out.setExtras(externals);
}
@ -185,31 +330,56 @@ public class Mapper implements Serializable {
return out;
}
private static eu.dnetlib.dhp.schema.dump.oaf.Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
eu.dnetlib.dhp.schema.dump.oaf.Author a = new eu.dnetlib.dhp.schema.dump.oaf.Author();
Optional
.ofNullable(oa.getAffiliation())
.ifPresent(
value -> a
.setAffiliation(
value
.stream()
.map(aff -> aff.getValue())
.collect(Collectors.toList())));
a.setFullname(oa.getFullname());
a.setName(oa.getName());
a.setSurname(oa.getSurname());
a.setRank(oa.getRank());
Optional
.ofNullable(oa.getPid())
.ifPresent(
value -> a
.setPid(
value
.stream()
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
return a;
public static String tagReplacements(String tag) {
return tag
.replace("&", " and ")
// .replace(" ", "_")
.replace("(", "_")
.replace(")", "_")
.replace("/", "_")
.replace("\\", "_")
.replace("[", "")
.replace("]", "")
.trim();
}
private static String getAuthor(Author v) {
String author = v.getFullname();
Optional<List<StructuredProperty>> oPid = Optional.ofNullable(v.getPid());
if (oPid.isPresent()) {
List<String> oList = oPid
.get()
.stream()
.filter(
p -> p
.getQualifier()
.getClassid()
.equalsIgnoreCase("orcid"))
.map(o -> o.getValue())
.collect(Collectors.toList());
if (oList.size() > 0)
author += ", " + oList.get(0);
}
return author;
}
private static String getListOfValues(Set<String> cfSet) {
StringWriter sw = new StringWriter();
cfSet.forEach(value -> sw.append(value + "; "));
return sw.toString().length() > 0 ? sw.toString().substring(0, sw.toString().length() - 2) : "";
}
private static void getDescription(CatalogueEntry out, List<KeyValue> externals, List<Field<String>> value) {
Iterator<Field<String>> it = value.iterator();
if (it.hasNext()) {
out.setNotes(it.next().getValue());
} else {
out.setNotes("");
}
it.forEachRemaining(v -> externals.add(KeyValue.newInstance("Description", v.getValue())));
}
}

View File

@ -0,0 +1,81 @@
package eu.dnetlib.dhp.oa.graph.dump.gcat;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.http.HttpStatus;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.Serializable;
public class SendToCatalogue implements Serializable {
private static final Log log = LogFactory.getLog(SendToCatalogue.class);
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SendToCatalogue.class
.getResourceAsStream(
"/eu/dnetlib/dhp/blacklist/catalogue_parameters.json")));
parser.parseArgument(args);
final String access_token = parser.get("accessToken");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles(
new Path(hdfsPath), true);
GCatAPIClient gCatAPIClient = new GCatAPIClient();
gCatAPIClient.setApplicationToken(access_token);
int purged = gCatAPIClient.purgeAll();
log.info("purged: " + purged);
while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath();
String p_string = p.toString();
String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
String name = tmp.substring(tmp.lastIndexOf("/") + 1);
log.info("Sending information for : " + name);
//String community_name = communityMap.get(community).replace(" ", "_");
log.info("Copying information for : " + name);
fileSystem.copyToLocalFile(p, new Path("/tmp/" + name));
BufferedReader reader = new BufferedReader(new FileReader("/tmp/" + name));
String line;
while((line=reader.readLine())!= null){
if (HttpStatus.SC_CREATED != gCatAPIClient.publish(line)){
log.error("entry non created for item " + line);
}
}
reader.close();
log.info("deleting information for: " + name);
File f = new File("/tmp/"+name);
f.delete();
}
}
}

View File

@ -86,6 +86,7 @@ public class SparkDumpRISISCatalogue implements Serializable {
(MapFunction<I, CatalogueEntry>) value -> execMap(value, communityName),
Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class))
.filter(Objects::nonNull)
.repartition(1)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -122,7 +122,7 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
@ -148,7 +148,7 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
@ -174,7 +174,7 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
@ -200,16 +200,25 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--communityName</arg><arg>${communityName}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="End"/>
<join name="join_dump" to="populate_catalogue"/>
<action name="populate_catalogue">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.gcat.SendToCatalogue</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/blacklist</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>

View File

@ -25,9 +25,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.SparkDumpCommunityProducts;
import eu.dnetlib.dhp.oa.graph.dump.gcat.Mapper;
import eu.dnetlib.dhp.oa.graph.dump.gcat.SparkDumpRISISCatalogue;
//@ExtendWith(MockitoExtension.class)
public class DumpJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -69,22 +69,20 @@ public class DumpJobTest {
}
@Test
public void testDataset() throws Exception {
public void testSoftware() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/software.json")
.getPath();
SparkDumpRISISCatalogue.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-communityName", "risis"
});
// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> tmp = sc
@ -94,149 +92,13 @@ public class DumpJobTest {
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
Assertions.assertEquals(90, verificationDataset.count());
// verificationDataset.show(false);
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset
.filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset
.filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset
.filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset
.filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'")
.count());
Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
Assertions.assertEquals(3, verificationDataset.count());
verificationDataset.show(false);
// verificationDataset.select("instance.type").show(false);
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
}
// @Test
// public void testPublication() throws Exception {
//
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
// .getPath();
//
// SparkDumpCommunityProducts.main(new String[] {
// "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-outputPath", workingDir.toString() + "/result",
// "-sourcePath", sourcePath,
// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
// "-communityMap", new Gson().toJson(map)
// });
//
//// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
// .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// Assertions.assertEquals(76, verificationDataset.count());
// verificationDataset.show(false);
//
// Assertions.assertEquals(76, verificationDataset.filter("type = 'publication'").count());
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// }
//
// @Test
// public void testSoftware() throws Exception {
//
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
// .getPath();
//
// SparkDumpCommunityProducts.main(new String[] {
// "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-outputPath", workingDir.toString() + "/result",
// "-sourcePath", sourcePath,
// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
// "-communityMap", new Gson().toJson(map)
// });
//
//// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
// .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// Assertions.assertEquals(6, verificationDataset.count());
//
// Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
// verificationDataset.show(false);
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// }
//
// @Test
// public void testORP() throws Exception {
//
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
// .getPath();
//
// SparkDumpCommunityProducts.main(new String[] {
// "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-outputPath", workingDir.toString() + "/result",
// "-sourcePath", sourcePath,
// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
// "-communityMap", new Gson().toJson(map)
// });
//
//// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
// .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// Assertions.assertEquals(3, verificationDataset.count());
//
// Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
// verificationDataset.show(false);
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// }
}

View File

@ -1,6 +1,9 @@
package eu.dnetlib.dhp.oa.graph.gcat;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.net.URISyntaxException;
@ -17,7 +20,7 @@ import eu.dnetlib.dhp.oa.graph.dump.gcat.GCatAPIClient;
* NEVER EVER ENABLE THIS CLASS UNLESS YOU ABSOLUTELY KNOW WHAT YOU ARE DOING: with the proper parameters set it can
* dropped a D4Science Catalogue
*/
@Disabled
//@Disabled
public class GCatAPIClientTest {
private static GCatAPIClient client;
@ -25,8 +28,8 @@ public class GCatAPIClientTest {
@BeforeAll
public static void setup() {
client = new GCatAPIClient();
client.setApplicationToken("");
client.setGcatBaseURL("");
client.setApplicationToken("816486a3-60a9-4ecc-a7e0-a96740a90207-843339462");
client.setGcatBaseURL("https://gcat.d4science.org/gcat/");
}
@Test
@ -39,9 +42,9 @@ public class GCatAPIClientTest {
// The 'name' must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, '-'
// and '_'.
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
String objidentifier = "nstest::test";
String objidentifier = "fake";
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_pub.json"));
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat.json"));
System.out.println("Creating item...");
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
System.out.println("item created, now listing...");
@ -51,6 +54,64 @@ public class GCatAPIClientTest {
System.out.println("item purged");
}
@Test
public void testPublish() throws IOException, URISyntaxException {
// The 'name' must be between 2 and 100 characters long and contain only lowercase alphanumeric characters, '-'
// and '_'.
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_test.json"));
System.out.println("Creating item...");
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
System.out.println("item created, now listing...");
// Assertions.assertEquals(1, client.list(0, 10).size());
}
@Test
public void bulkPublishORP() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_other.json")
.getPath()));
String line;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void bulkPublishDATS() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat.json")
.getPath()));
String line;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void bulkPublishPUBS() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_publications")
.getPath()));
String line;
while ((line = reader.readLine()) != null) {
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(line));
}
}
@Test
public void purgeItem() throws IOException, URISyntaxException {
String objidentifier = "fake";
Assertions.assertTrue(client.purge(objidentifier));
System.out.println("item purged");
}
@Test
public void testPurgeUnexisting() throws IOException, URISyntaxException {
String id = "1234";
@ -59,7 +120,7 @@ public class GCatAPIClientTest {
@Test
public void testPurgeAllEmptyCat() throws IOException, URISyntaxException {
Assertions.assertEquals(0, client.purgeAll());
Assertions.assertEquals(179, client.purgeAll());
}
@Test

View File

@ -0,0 +1,2 @@
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Laredo, Philippe, 0000-0002-5014-9132"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.2560117"},{"key":"Risis2_Publishing:Publication Date","value":"2019-02-08"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"ZENODO; Datacite; figshare"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO; figshare"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560117"},{"key":"Identity:URL","value":"https://zenodo.org/record/2560117"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.2560116"},{"key":"Identity:URL","value":"https://figshare.com/articles/Introduction_of_RISIS_project_by_Philippe_Laredo/7699286"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Audiovisual"}],"groups":[{"name":"open"},{"name":"zenodo"},{"name":"figshare"}],"license_id":"notspecified","name":"dedup_wf_001--a10160b3eafcedeb0a384fc400fe1c3fa","notes":"<p>Introduction of RISIS project by Philippe Laredo</p>","tags":[],"title":"Introduction of RISIS project by Philippe Laredo","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::10160b3eafcedeb0a384fc400fe1c3fa","version":"None"}
{"extras":[{"key":"system:type","value":"dataset"},{"key":"Risis2_Attribution:Author","value":"Lepori, Benedetto"},{"key":"Risis2_Attribution:Author","value":"Guerini, Massimilano"},{"key":"AccessMode:Access Right","value":"Open Access"},{"key":"Risis2_Attribution:Contributor","value":"European Commission"},{"key":"AccessMode:Embargo End Date","value":""},{"key":"Language","value":"English"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:PID","value":"https://www.doi.org/10.5281/zenodo.3752860"},{"key":"Risis2_Publishing:Publication Date","value":"2020-04-15"},{"key":"Risis2_Publishing:Publisher","value":"Zenodo"},{"key":"Risis2_Publishing:Collected From","value":"Zenodo; ZENODO; Datacite"},{"key":"Risis2_Publishing:Hosted By","value":"Zenodo; ZENODO"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752861"},{"key":"Identity:URL","value":"https://zenodo.org/record/3752861"},{"key":"Identity:URL","value":"http://dx.doi.org/10.5281/zenodo.3752860"},{"key":"Country","value":""},{"key":"Format","value":""},{"key":"Resource Type","value":"Dataset"}],"groups":[{"name":"open"},{"name":"zenodo"}],"license_id":"notspecified","name":"dedup_wf_001--ac4634a42d4b98e594e0796a41b47ec61","notes":"<p>This file provides the correspondence table between EUROSTAT NUTS3 classification and the adapted regional classification used by the RISIS-KNOWMAK project. This regional classification fits the structure of knowledge production in Europe and addresses some knowm problems of the NUTS3 classification, such as the treatment of large agglomerations, while remaining fully compatible with the EUROSTAT NUTS regional classification. This compatibility allows combining all KNOWMAK data with regional statistics (at NUTS3 level, 2016 edition) from EUROSTAT.</p>\n\n<p>More precisely, the classification includes EUROSTAT metropolitan regions (based on the aggregation of NUTS3-level regions) and NUTS2 regions for the remaining areas; further, a few additional centers for knowledge production, like Oxford and Leuven, have been singled out at NUTS3 level. The resulting classification is therefore more fine-grained than NUTS2 in the areas with sizeable knowledge production, but at the same time recognizes the central role of metropolitan areas in knowledge production. While remaining compatible with NUTS, the classification allows addressing two well-known shortcomings: a) the fact that some large cities are split between NUTS regions (London) and b) the fact that NUTS3 classification in some countries includes many very small regions, as in the case of Germany</p>","tags":[],"title":"RISIS-KNOWMAK NUTS adapted classification","url":"https://beta.risis.openaire.eu/search/dataset?datasetId=dedup_wf_001::c4634a42d4b98e594e0796a41b47ec61","version":""}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,170 @@
{
"extras": [
{
"key": "Journal",
"value": "International Journal of Technology Management, 80, null"
},
{
"key": "system:type",
"value": "publication"
},
{
"key": "Author",
"value": "Laurens, Patricia"
},
{
"key": "Author",
"value": "Le Bas, Christian"
},
{
"key": "Author",
"value": "Schoen, Antoine"
},
{
"key": "AccessMode:Access Right",
"value": "Open Access"
},
{
"key": "Contributor",
"value": "Laboratoire Interdisciplinaire Sciences, Innovations, Sociétés (LISIS) ; Institut National de la Recherche Agronomique (INRA)-Université Paris-Est Marne-la-Vallée (UPEM)-ESIEE Paris-Centre National de la Recherche Scientifique (CNRS)"
},
{
"key": "Contributor",
"value": "ESDES - École de management de Lyon ; Université Catholique de Lyon"
},
{
"key": "Contributor",
"value": "This work was supported by RISIS-funded by the European Union\u2019s Horizon2020 Research and innovation programme under grant number 313082 and 824091"
},
{
"key": "Contributor",
"value": "European Project: 313082,EC:FP7:INFRA,FP7-INFRASTRUCTURES-2012-1,RISIS(2014)"
},
{
"key": "Contributor",
"value": "European Project: 824091,H2020-EU.1.4.1.2,H2020-INFRAIA-2018-1,RISIS2(2019)"
},
{
"key": "Contributor",
"value": "Laboratoire Interdisciplinaire Sciences, Innovations, Société\n (\nLISIS\n)\n\n ; \nInstitut National de la Recherche Agronomique\n (\nINRA\n)\n-Université Paris-Est Marne-la-Vallée\n (\nUPEM\n)\n-ESIEE Paris-Centre National de la Recherche Scientifique\n (\nCNRS\n)"
},
{
"key": "Contributor",
"value": "ESDES - École de management de Lyon\n ; \nUniversité Catholique de Lyon"
},
{
"key": "Contributor",
"value": "Laboratoire Interdisciplinaire Sciences, Innovations, Sociétés (LISIS) ; Centre National de la Recherche Scientifique (CNRS)-ESIEE Paris-Université Paris-Est Marne-la-Vallée (UPEM)-Institut National de la Recherche Agronomique (INRA)"
},
{
"key": "AccessMode:Embargo End Date",
"value": ""
},
{
"key": "Language",
"value": "Undetermined"
},
{
"key": "Identity:PID",
"value": "https://www.doi.org/10.1504/ijtm.2019.100283"
},
{
"key": "Identity:PID",
"value": "https://www.doi.org/10.1504/ijtm.2019.10022013"
},
{
"key": "Publication Date",
"value": "2019-01-01"
},
{
"key": "Publisher",
"value": "Inderscience Publishers"
},
{
"key": "Collected From",
"value": "UnpayWall; INRIA a CCSD electronic archive server; HAL Descartes; HAL - UPEC / UPEM; Crossref; Hyper Article en Ligne; Microsoft Academic Graph; Hyper Article en Ligne - Sciences de l'Homme et de la Société"
},
{
"key": "Hosted By",
"value": "INRIA a CCSD electronic archive server; HAL Descartes; HAL - UPEC / UPEM; Hyper Article en Ligne; Hyper Article en Ligne - Sciences de l'Homme et de la Société; International Journal of Technology Management"
},
{
"key": "Identity:URL",
"value": "https://hal.archives-ouvertes.fr/hal-01725229"
},
{
"key": "Identity:URL",
"value": "https://hal.archives-ouvertes.fr/hal-01725229/document"
},
{
"key": "Identity:URL",
"value": "https://academic.microsoft.com/#/detail/2791245388"
},
{
"key": "Identity:URL",
"value": "http://dx.doi.org/10.1504/ijtm.2019.10022013"
},
{
"key": "Identity:URL",
"value": "http://www.inderscienceonline.com/doi/full/10.1504/IJTM.2019.100283"
},
{
"key": "Identity:URL",
"value": "https://hal.archives-ouvertes.fr/hal-01725229/file/IP%20internationalisation_2017.pdf"
},
{
"key": "Identity:URL",
"value": "http://dx.doi.org/10.1504/ijtm.2019.100283"
},
{
"key": "Identity:URL",
"value": "http://www.inderscienceonline.com/doi/full/10.1504/IJTM.2019.10022013"
},
{
"key": "Country",
"value": "France"
},
{
"key": "Format",
"value": ""
},
{
"key": "Resource Type",
"value": "Article"
},
{
"key": "keyword",
"value": "Manufacturing_L.L6.L65 - Chemicals \u2022 Rubber \u2022 Drugs \u2022 Biotechnology JEL"
},
{
"key": "keyword",
"value": "O - Economic Development, Innovation, Technological Change, and Growth_O.O3 - Innovation \u2022 Research and Development \u2022 Technological Change \u2022 Intellectual Property Rights_O.O3.O34 - Intellectual Property and Intellectual Capital JEL"
}
],
"license_id": "notspecified",
"name": "dedup_wf_001--a48fee33ea4df43e302f6957209893f81",
"notes": "International audience; The paper deals with the determinants of worldwide IP coverage of patented inventions in large pharmaceutical firms. We support the core idea that the internationalisation of firm R&D and an economic presence in a foreign country are positive key factors which explains global IP coverage. For the global pharmaceutical industry, we estimate probit models on the probability that a patent will be expanded worldwide. We retain two categories of worldwide patent: the well-known triadic patent and the new triadic one (triadic + China + Korea). The data set encompasses the 17,633 priority patents applied for by 76 enterprises from several countries over the period 2003-2005. One important finding is that patenting in Japan sets up an important barrier, giving Japanese firms an advantage when triadic patenting is considered. For European and US firms, our estimation results confirm the idea that the level of firm R&D internationalisation is a significant explanatory factor in international IP coverage, together with control variables. We highlight an inverted U-shaped relationship between these two variables. The hypothesis related to a firm economic presence is also verified.",
"tags": [
{"name": "Economics"},
{"name": "Industrial relations"},
{"name": "Law"},
{"name": "business"},
{"name": "F - International Economics_F.F2 - International Factor Movements and International Business_F.F2.F22 - International Migration JEL"},
{"name": "Strategy and Management"},
{"name": "Probit model"},
{"name": "SHS.GESTION Humanities and Social Sciences_Business administration"},
{"name": "General Engineering"},
{"name": "Marketing"},
{"name": "Control variable"},
{"name": "Industrial organization"},
{"name": "Computer Science Applications"},
{"name": "China"},
{"name": "Internationalization"},
{"name": "business.industry"},
{"name": "Pharmaceutical industry"},
{"name": "Foreign country"},
{"name": "Firm strategy"}
],
"title": "Worldwide IP coverage of patented inventions in large pharma firms: to what extent do the internationalisation of R&D and firm strategy matter",
"url": "https://beta.risis.openaire.eu/search/publication?articleId=dedup_wf_001::48fee33ea4df43e302f6957209893f81"
}

File diff suppressed because one or more lines are too long