diff --git a/data/beginners_kit.ipynb b/data/beginners_kit.ipynb new file mode 100644 index 0000000..06fcb13 --- /dev/null +++ b/data/beginners_kit.ipynb @@ -0,0 +1,3543 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OpenAIRE Beginners Kit\n", + "\n", + "The OpenAIRE Research Graph is an Open Access dataset containing metadata about research products (literature, datasets, software, etc.) linked to other entities of the research ecosystem like organisations, project grants, and data sources.\n", + "\n", + "The large size of the OpenAIRE Research Graph is a major impediment for beginners to familiarise with the underlying data model and explore its contents. Working with the Graph in its full size typically requires access to a huge distributed computing infrastructure which cannot be easily accessible to everyone.\n", + "\n", + "The OpenAIRE Beginner’s Kit aims to address this issue. It consists of two components: a subset of the Graph composed of the research products published between 2022-06-29 and 2022-12-29, all the entities connected to them and the respective relationships, and the present Zeppelin notebook that demonstrates how you can use PySpark to analyse the Graph and get answers to some interesting research questions.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [ + "zenodo_base_url" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading communities_infrastructures.tar\n", + "Extracting communities_infrastructures.tar\n", + "Downloading dataset.tar\n", + "Extracting dataset.tar\n", + "Downloading datasource.tar\n", + "Extracting datasource.tar\n", + "Downloading organization.tar\n", + "Extracting organization.tar\n", + "Downloading otherresearchproduct.tar\n", + "Extracting otherresearchproduct.tar\n", + "Downloading project.tar\n", + "Extracting project.tar\n", + "Downloading publication.tar\n", + "Extracting publication.tar\n", + "Downloading relation.tar\n", + "Extracting relation.tar\n", + "Downloading software.tar\n", + "Extracting software.tar\n" + ] + } + ], + "source": [ + "!rm -rf data\n", + "!mkdir data\n", + "\n", + "import os\n", + "base_url = \"https://zenodo.org/record/7490192/files/\"\n", + "\n", + "\n", + "items =[\"communities_infrastructures.tar\",\"dataset.tar\",\"datasource.tar\",\"organization.tar\",\"otherresearchproduct.tar\",\"project.tar\",\"publication.tar\",\"relation.tar\", \"software.tar\"]\n", + "\n", + "for item in items: \n", + " print(f\"Downloading {item}\")\n", + " os.system(f'wget {base_url}{item}?download=1 -O data/{item}')\n", + " print(f\"Extracting {item}\")\n", + " os.system(f'tar -xf data/{item} -C data/; rm data/{item}')\n", + " \n", + " \n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Have a look at the input data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "import pyspark.sql.functions as F\n", + "from pyspark.sql.functions import col\n", + "from pyspark.sql.types import StructType\n", + "from pyspark.sql import SparkSession\n", + "from IPython.display import JSON as pretty_print\n", + "\n", + "\n", + "spark = SparkSession.builder.getOrCreate()\n", + "\n", + "\n", + "\n", + "publicationSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"author\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"fullname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"rank\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"surname\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"bestaccessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"container\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"conferencedate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"conferenceplace\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"edition\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"ep\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"iss\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"issnLinking\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"issnOnline\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"issnPrinted\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"sp\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"vol\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"contributor\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"country\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"coverage\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"dateofcollection\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"embargoenddate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"format\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"indicators\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impactMeasures\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impulse\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"usageCounts\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"downloads\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"views\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"instance\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"accessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"openAccessRoute\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"alternateIdentifier\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"license\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"refereed\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"url\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"language\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"lastupdatetimestamp\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"maintitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"originalId\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"publisher\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"source\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subjects\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"subject\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subtitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "datasetSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"author\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"fullname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"rank\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"surname\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"bestaccessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"contributor\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"country\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"coverage\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"dateofcollection\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"embargoenddate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"format\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"geolocation\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"box\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"place\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"point\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"indicators\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impactMeasures\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impulse\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"usageCounts\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"downloads\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"views\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"instance\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"accessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"openAccessRoute\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"alternateIdentifier\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"license\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"refereed\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"url\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"language\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"lastupdatetimestamp\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"maintitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"originalId\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"publisher\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"size\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"source\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subjects\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"subject\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subtitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"version\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "softwareSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"author\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"fullname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"rank\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"surname\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"bestaccessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"contributor\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"country\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"coverage\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"dateofcollection\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"documentationUrl\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"embargoenddate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"format\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"indicators\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impactMeasures\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impulse\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"usageCounts\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"downloads\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"views\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"instance\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"accessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"openAccessRoute\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"alternateIdentifier\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"license\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"refereed\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"url\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"language\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"lastupdatetimestamp\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"maintitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"originalId\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"programmingLanguage\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"publisher\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"source\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subjects\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"subject\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subtitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "otherSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"author\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"fullname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"rank\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"surname\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"bestaccessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"contactgroup\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"contactperson\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"contributor\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"country\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"coverage\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"dateofcollection\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"embargoenddate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"format\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"indicators\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impactMeasures\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"impulse\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"influence_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"popularity_alt\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"class\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"score\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"usageCounts\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"downloads\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"views\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"instance\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"accessright\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"openAccessRoute\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"alternateIdentifier\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"license\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"refereed\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"url\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"language\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"lastupdatetimestamp\",\"nullable\":true,\"type\":\"long\"},{\"metadata\":{},\"name\":\"maintitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"originalId\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"publicationdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"publisher\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"source\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subjects\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"subject\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"subtitle\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"tool\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "datasourceSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"accessrights\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"certificates\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"citationguidelineurl\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"databaseaccessrestriction\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"datasourcetype\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"datauploadrestriction\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"dateofvalidation\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"englishname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"journal\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"issnLinking\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"issnOnline\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"issnPrinted\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"languages\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"logourl\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"missionstatementurl\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"officialname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"openairecompatibility\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"originalId\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"pidsystems\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"policies\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"releasestartdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"subjects\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"uploadrights\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"versioning\",\"nullable\":true,\"type\":\"boolean\"},{\"metadata\":{},\"name\":\"websiteurl\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "organizationSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"alternativenames\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"country\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"label\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"legalname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"legalshortname\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"pid\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"scheme\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"value\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"websiteurl\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "projectSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"acronym\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"callidentifier\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"enddate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"funding\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"funding_stream\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"jurisdiction\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"shortName\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"granted\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"currency\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"fundedamount\",\"nullable\":true,\"type\":\"double\"},{\"metadata\":{},\"name\":\"totalcost\",\"nullable\":true,\"type\":\"double\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"h2020programme\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":{\"fields\":[{\"metadata\":{},\"name\":\"code\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"},\"type\":\"array\"}},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"keywords\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"openaccessmandatefordataset\",\"nullable\":true,\"type\":\"boolean\"},{\"metadata\":{},\"name\":\"openaccessmandateforpublications\",\"nullable\":true,\"type\":\"boolean\"},{\"metadata\":{},\"name\":\"startdate\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"subject\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"summary\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"title\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"websiteurl\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "communitySchema = '{\"fields\":[{\"metadata\":{},\"name\":\"acronym\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"description\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"subject\",\"nullable\":true,\"type\":{\"containsNull\":true,\"elementType\":\"string\",\"type\":\"array\"}},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"zenodo_community\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'\n", + "relationSchema = '{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"provenance\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"trust\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"reltype\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"name\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"source\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"target\",\"nullable\":true,\"type\":{\"fields\":[{\"metadata\":{},\"name\":\"id\",\"nullable\":true,\"type\":\"string\"},{\"metadata\":{},\"name\":\"type\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}},{\"metadata\":{},\"name\":\"validated\",\"nullable\":true,\"type\":\"boolean\"},{\"metadata\":{},\"name\":\"validationDate\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}'" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "number of publications 2685793\n", + "number of datasets 128092\n", + "number of software 26992\n", + "number of other research products 22779\n", + "number of results 2863656\n", + "number of datasources 47356\n", + "number of organizations 7411\n", + "number of communities 17\n", + "number of projects 15780\n", + "number of relationships 14004807\n" + ] + } + ], + "source": [ + "\n", + "\n", + "\n", + "\n", + "#set the input path: the path on the cluster where the dataset will be stored (e.g. '/data/openaire_dump_subset/'); untar each folder in the dataset and move it to the chosen path\n", + "\n", + "inputPath = 'data/'\n", + " \n", + "# load entities and relationships\n", + "publication = spark.read.schema(StructType.fromJson(json.loads(publicationSchema))).json(inputPath + 'publication')\n", + "dataset = spark.read.schema(StructType.fromJson(json.loads(datasetSchema))).json(inputPath + 'dataset')\n", + "software = spark.read.schema(StructType.fromJson(json.loads(softwareSchema))).json(inputPath + 'software')\n", + "other = spark.read.schema(StructType.fromJson(json.loads(otherSchema))).json(inputPath + 'otherresearchproduct')\n", + "#results = publication.dropColumn('container').unionByName(dataset.dropColumns('size', 'version', 'geolocation'), allowMissingColumns=True).unionByName(software.dropColumns('documentationUrl', 'codeRepositoryUrl', 'programmingLanguage'), allowMissingColumns=True).unionByName(other.dropColumns('contactperson', 'contactgroup', 'tool'), allowMissingColumns=True)\n", + "results = publication.unionByName(dataset, allowMissingColumns=True).unionByName(software, allowMissingColumns=True).unionByName(other, allowMissingColumns=True)\n", + "datasource = spark.read.schema(StructType.fromJson(json.loads(datasourceSchema))).json(inputPath + 'datasource')\n", + "organization = spark.read.schema(StructType.fromJson(json.loads(organizationSchema))).json(inputPath + 'organization')\n", + "project = spark.read.schema(StructType.fromJson(json.loads(projectSchema))).json(inputPath + 'project')\n", + "community = spark.read.schema(StructType.fromJson(json.loads(communitySchema))).json(inputPath + 'communities_infrastructures')\n", + "relation = spark.read.schema(StructType.fromJson(json.loads(relationSchema))).json(inputPath + 'relation')\n", + "\n", + "publication.createOrReplaceTempView(\"publications\")\n", + "dataset.createOrReplaceTempView(\"datasets\")\n", + "software.createOrReplaceTempView(\"software\")\n", + "other.createOrReplaceTempView(\"others\")\n", + "results.createOrReplaceTempView(\"results\")\n", + "datasource.createOrReplaceTempView(\"datasources\")\n", + "organization.createOrReplaceTempView(\"organizations\")\n", + "project.createOrReplaceTempView(\"projects\")\n", + "community.createOrReplaceTempView(\"communities\")\n", + "relation.createOrReplaceTempView(\"relations\")\n", + "\n", + "# count and print their number\n", + "print(\"number of publications %s\"%publication.count())\n", + "print(\"number of datasets %s\"%dataset.count())\n", + "print(\"number of software %s\"%software.count())\n", + "print(\"number of other research products %s\"%other.count())\n", + "print(\"number of results %s\"%results.count())\n", + "print(\"number of datasources %s\"%datasource.count())\n", + "print(\"number of organizations %s\"%organization.count())\n", + "print(\"number of communities %s\"%community.count())\n", + "print(\"number of projects %s\"%project.count())\n", + "print(\"number of relationships %s\"%relation.count())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "application/json": { + "author": [ + { + "fullname": "Son, D.A.", + "name": "D. A.", + "rank": 1, + "surname": "Son" + }, + { + "fullname": "Anh, N.T.N", + "name": "N. T. N.", + "rank": 2, + "surname": "Anh" + }, + { + "fullname": "Tung, P.A.", + "name": "P. A.", + "rank": 3, + "surname": "Tung" + } + ], + "bestaccessright": { + "code": "c_abf2", + "label": "OPEN", + "scheme": "http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "container": { + "edition": "", + "ep": "", + "iss": "", + "issnLinking": "", + "issnOnline": "0012-835X", + "issnPrinted": "0012-835X", + "name": "East African Medical Journal", + "sp": "", + "vol": "" + }, + "contributor": [], + "country": [], + "coverage": [], + "dateofcollection": "2022-09-04T02:25:47.163Z", + "description": [ + "Objectives: To understand the physical activity and cognitive impairment among elderly people. Materials and methods: The study was conducted on 1210 elderly people (aged 60 and older) in 3 district in Ha Nam province: Binh Luc, Duy Tien and Kim Bang, from January 2020 to June 2020. Results: The prevalence of elderly people with symptoms of cognitive impairment was 46.36%. There was a significant difference in the rate of cognitive impairment between 2 groups of continuous exercise (41.92%) and the group Non-exercise (51.62%) (p<0.05). Compared to the Non-exercise group, the risk of cognitive impairment of group that exercise 15 - 45 minutes/week (OR = 0.69; 95% CI: 0.51 - 0.93; p = 0.014); group that exercise 45 - 90 minute/week group (OR = 0.61; 95% CI: 0.44 - 0.84; p = 0.003) and group that exercise over 90 minutes/week (OR = 0.43; 95% CI: 0.27 - 0.67; p = 0.000) were all significantly lower. Conclusion: The more time the participants spent exercise, the lower the risk of developing cognitive impairment they get. It is necessary to expand research, continue to apply the international standard cognitive screening test, also guide and encourage the older adults to carry out physical activities according to their capabilities and international recommendations." + ], + "format": [ + "application/pdf" + ], + "id": "50|78975075580c::2ff84f3173897001283274434e8f3eaa", + "instance": [ + { + "accessright": { + "code": "c_abf2", + "label": "OPEN", + "scheme": "http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "alternateIdentifier": [], + "pid": [], + "publicationdate": "2022-08-29", + "refereed": "peerReviewed", + "type": "Article", + "url": [ + "https://www.ajol.info/index.php/eamj/article/view/230697" + ] + } + ], + "language": { + "code": "eng", + "label": "English" + }, + "lastupdatetimestamp": 1671492313610, + "maintitle": "Physical activity habits and cognitive impairment in the elderly in some districts of Ha Nam province, Vietnam in 2020", + "originalId": [ + "oai:ajol.info:article/230697" + ], + "pid": [], + "publicationdate": "2022-08-29", + "publisher": "Kenya Medical Association", + "source": [ + "East African Medical Journal; Vol. 99 No. 7 (2022); 5014-5020", + "0012-835X" + ], + "subjects": [], + "type": "publication" + }, + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": { + "application/json": { + "expanded": true, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "# the generic result (link to documentation: https://graph.openaire.eu/docs/data-model/entities/result)\n", + "pretty_print(json.loads(publication.where(\"id='50|78975075580c::2ff84f3173897001283274434e8f3eaa'\").toJSON().first()), expanded=True)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "application/json": { + "datasourcetype": { + "scheme": "datarepository::unknown", + "value": "Data Repository" + }, + "description": "Scholars Portal Dataverse is a repository of research data in all fields of research. Researchers can share, publish, archive, find and cite data across all research fields. Researchers from subscribing institutions can use Dataverse to directly deposit data, create metadata, release and share data openly or privately, visualize and explore data, and search for data.", + "id": "10|fairsharing_::c3a690be93aa602ee2dc0ccab5b7b67e", + "languages": [], + "officialname": "Scholars Portal Dataverse", + "openairecompatibility": "Not yet registered", + "originalId": [ + "fairsharing_::2542", + "opendoar____::10329", + "re3data_____::r3d100010691" + ], + "pid": [ + { + "scheme": "doi", + "value": "10.25504/FAIRsharing.kwzydf" + }, + { + "scheme": "re3data", + "value": "r3d100010691" + } + ], + "policies": [], + "releasestartdate": "2012-01-01", + "subjects": [ + "Data Management", + "Subject Agnostic", + "Experimental measurement", + "Protocol", + "Data storage" + ], + "versioning": false, + "websiteurl": "https://dataverse.scholarsportal.info/" + }, + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": { + "application/json": { + "expanded": true, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "# the data source (link to documentation: https://graph.openaire.eu/docs/data-model/entities/data-source)\n", + "pretty_print(json.loads(datasource.where(\"id='10|fairsharing_::c3a690be93aa602ee2dc0ccab5b7b67e'\").toJSON().first()), expanded=True)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "application/json": { + "alternativenames": [ + "Hospital Authority", + "HA" + ], + "country": { + "code": "CN", + "label": "China (People's Republic of)" + }, + "id": "20|openorgs____::5836463160e0e5d1cd12997f7d2f0257", + "legalname": "Hospital Authority", + "legalshortname": "HA", + "pid": [ + { + "scheme": "ISNI", + "value": "0000 0004 1764 4320" + }, + { + "scheme": "FundRef", + "value": "501100003808" + }, + { + "scheme": "FundRef", + "value": "501100006577" + }, + { + "scheme": "GRID", + "value": "grid.414370.5" + }, + { + "scheme": "ROR", + "value": "https://ror.org/05sn8t512" + }, + { + "scheme": "Wikidata", + "value": "Q5908350" + }, + { + "scheme": "ISNI", + "value": "0000 0004 1764 4320" + }, + { + "scheme": "FundRef", + "value": "501100003808" + }, + { + "scheme": "FundRef", + "value": "501100006577" + }, + { + "scheme": "GRID", + "value": "grid.414370.5" + }, + { + "scheme": "ROR", + "value": "https://ror.org/05sn8t512" + }, + { + "scheme": "Wikidata", + "value": "Q5908350" + } + ], + "websiteurl": "http://www.ha.org.hk/visitor/ha_index.asp?Content_ID=0&Lang=ENG&Dimension=100&Ver=HTML" + }, + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": { + "application/json": { + "expanded": true, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "# the organization (link to documentation: https://graph.openaire.eu/docs/data-model/entities/organization)\n", + "pretty_print(json.loads(organization.where(\"id='20|openorgs____::5836463160e0e5d1cd12997f7d2f0257'\").toJSON().first()), expanded=True)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "application/json": { + "acronym": "BFC", + "code": "ANR-15-IDEX-0003", + "funding": [ + { + "jurisdiction": "FR", + "name": "French National Research Agency (ANR)", + "shortName": "ANR" + } + ], + "granted": { + "currency": "EUR", + "fundedamount": 49328900, + "totalcost": 0 + }, + "h2020programme": [], + "id": "40|anr_________::3590b45fea74b726d3c3e9872a2dcbf8", + "openaccessmandatefordataset": false, + "openaccessmandateforpublications": false, + "subject": [], + "summary": "Le projet ISITE-BFC réunit 16 partenaires : Université Bourgogne Franche-Comté (UBFC), porteur du projet ISITE-BFC ; Les 7 établissements membres d’UBFC : Université de Bourgogne (uB), Université de Franche-Comté (UFC), Université de Technologie de Belfort-Montbéliard (UTBM) , AgroSup Dijon, Ecole Nationale Supérieure de Mécanique et de Microtechniques (ENSMM), Burgundy School of Business (BSB) et l'Ecole Nationale Supérieure des Arts et Métiers (ENSAM en cours d'intégration en tant qu'établissement membre d'UBFC ). Quatre organismes nationaux de recherche actifs en Bourgogne Franche-Comté (BFC): l’INRA, le CNRS, l’INSERM et le CEA. Quatre établissements de santé implantés en BFC : le CHRU Besançon, le CHU Dijon, le Centre Georges François Leclerc (CGFL, Centre de Lutte Contre le Cancer) et l’Etablissement Français du Sang (EFS). ISITE-BFC a pour but de propulser l'université-cible du projet UBFC sur la scène internationale. UBFC représente en BFC une communauté de 58 000 étudiants et 8 800 personnels dont 2 200 enseignants-chercheurs actifs dans 60 laboratoires et fédérations de recherche. La Région BFC a annoncé que, parmi les établissements d'ESR de BFC, UBFC sera son interlocuteur privilégié en matière de soutien à l'innovation. En 2016, les membres du consortium ont obtenus 62 nouveaux projets nationaux et 31 nouveaux projets internationaux correspondant à des budgets cumulés supérieurs à 14 M€ et à 10 M€ respectivement. En fédérant les établissements membres et en unissant ainsi les forces en matière d’enseignement supérieur et de recherche, UBFC intègre dès 2016 le classement international des universités proposé par le Times Higher Education entre la 501ème et 600ème place parmi 980 institutions internationales classées. Les premières élections du CA et du CAC d'UBFC ont eu lieu en avril 2016. Le premier CA élu d'UBFC a ensuite choisi le premier président élu d'UBFC. Toutes les instances de gouvernance d'UBFC et du projet ISITE-BFC sont devenues opérationnelles en 2016. Le Conseil des Membres, constitué de l’ensemble des chefs d’établissements membres d'UBFC, était bien sûr déjà fonctionnel avant 2016 puisqu'il avait travaillé à la rédaction des statuts de la COMUE UBFC créée en 2015. Durant 2016, conformément aux statuts, ce Conseil a continué à agir comme comité de pilotage de l'Université-Cible UBFC. En automne 2016, en donnant pour instruction à leurs personnels enseignants-chercheurs respectifs de soumettre les projets ANR et H2020 en y définissant UBFC comme porteur légal, les chefs d'établissements membres d'UBFC ont réalisé un engagement important prévu dans le projet ISITE-BFC pour la trajectoire d'UBFC. Le projet ISITE-BFC occupe une place centrale dans l'articulation avec la politique contractuelle du MENESR. ISITE-BFC est en effet au cœur des perspectives décrites dans le rapport HCERES d'UBFC déposé en 2016. Le dialogue contractuel de site avec l’Etat a également été initié en 2016 et élaboré principalement durant l'automne 2016. Le contrat de site a été signé en avril 2017 et inscrit également l'ISITE-BFC dans ses objectifs stratégiques. Durant 2016, les messages à l'attention de la communauté des enseignants-chercheurs ont été multiplié pour les inciter à appliquer la signature scientifique unique UBFC. L'objectif annoncé dans la convention de préfinancement a été atteint, à savoir 30 % d'application de la signature scientifique unique UBFC sur la période de juillet 2015 (date de démarrage de l'application de la signature unique UBFC) à fin 2016. Sur la période de janvier à décembre 2016, ce taux est de 48 %. Depuis 2016, chaque personnel membre d'un établissement membre d'UBFC dispose d'une adresse de courrier éléctronique du type prenom.nom@ubfc.fr. Le projet ISITE-BFC a été sélectionné en janvier 2016. Il a bénéficié d'un préfinancement de 5 M€ à partir d'août 2016. Dès que l'enveloppe du préfinancement a été connue, le Comité de Pilotage d'ISITE-BFC a élaboré un plan d'utilisation des fonds sur un nombre réduit d'outils par rapport à ceux prévus dans le projet amendé (car le budget du préfinancement est environ deux fois inférieur au budget soumis dans le projet amendé) et configuré tous les détails d'un appel à projets internes au consortium, publié le 15 septembre 2016. Une réunion de lancement a eu lieu le 23 septembre 2016 et 95 projets ont été déposés avant la date limite du 4 novembre 2016. Les canevas des dossiers à soumettre imposaient de lister les effectifs potentiellement concernés. Avec une moyenne de 10 personnels concernés par soumission, on peut estimer que près de 1000 membres du personnel ont été impliqués de près ou de loin dans l'élaboration des projets soumis. Les outils ouverts contribuent majoritairement aux trois domaines prioritaires de l’ISITE. Les projets ont été évalués pendant la période novembre-janvier 2016. Les lauréats ont été sélectionnés le 27 janvier 2017. les projets financés représentent un budget total de 3,6 M€. Le détail des projets sélectionnés (y compris les budgets ISITE-BFC) est accessible via http://www.ubfc.fr/resultats/. Le reste du préfinancement a été alloué à des actions d'internationalisation du site, au développement du campus numérique, à la promotion de la marque UBFC et à la gestion du projet. Un effet d’entraînement important de ce premier appel à projets internes au consortium a été constaté dans les dossiers soumis à l'instrument « Masters internationaux dispensés en langue anglaise ». Le plan initial prévu dans le Projet Amendé prévoyait d'ouvrir 9 de ces masters en quatre ans alors que 9 projets de ce type de master ont été soumis dès le premier appel à projets internes, attestant ainsi que la communauté s'investit dans la construction d’une offre de formation attractive à l’échelle du site et sous portage UBFC grâce à ISITE-BFC. Cinq projets (correspondant à cinq mentions et six parcours) ont été sélectionnés en janvier 2017, validés par le CNESER et ouvriront en septembre 2017 : Biologie de la Conservation ; Physique fondamentale et applications ; Automatique, robotique ; Intelligence économique ; Sciences et Technologie de l’Agriculture, de l’Alimentation et de l’Environnement. Tous impliquent plusieurs établissements de la COMUE UBFC. Le niveau de réponse plus élevé que prévu invite le COPIL ISITE-BFC a revoir à la hausse les objectifs à quatre ans en ce qui concerne les masters internationaux. D’autre part, UBFC a coordonné et soumis en 2016 un projet de développement de Formation Tout au Long de la Vie (FTLV), « Expérimentation Pilotes FTLV », impliquant tous les établissements membres. Ce projet a été retenu par le MENESR en décembre 2016. Parallèlement, l'IDEFI TalentCampus – projet consacré au développement des compétences sociales - a opéré une spectaculaire montée en puissance : 6000 participants en 2016 sur les 13000 au total depuis 2012. Avec une ligne d'action dédiée aux actions de recherches partenariales avec les industries locales, le projet ISITE-BFC a suscité l'intérêt des clusters d'industries implantés en BFC. UBFC est ainsi apparu comme un interlocuteur de poids aux yeux des industries locales. Douze projets de recherche en partenariat avec des entreprises locales soumis au premier appel à projets internes au consortium. Quatre de ces projets ont été sélectionnés. L'intégration des deux Labex coordonnés en BFC est matérialisée par le fait que les acteurs de ces deux Labex ont configuré ou inspiré plusieurs projets soumis à l'appel d'offres internes au consortium ISITE-BFC. La synergie entre les Labex et ISITE-BFC est bien visible au niveau des projets sélectionnés par un comité d'experts internationaux : trois projets en partenariats avec des industries, un Junior Fellowship, un projet blanc et un projet interdisciplinaire sont coordonnés par des acteurs du Labex ACTION ; deux projets interdisciplinaires et un Junior Fellowship sont coordonnés par des acteurs du Labex LIPSTIC.", + "title": "ISITE « BFC" + }, + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": { + "application/json": { + "expanded": true, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "# the project (link to documentation: https://graph.openaire.eu/docs/data-model/entities/project)\n", + "pretty_print(json.loads(project.toJSON().first()), expanded=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "application/json": { + "acronym": "mes", + "description": "This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).", + "id": "00|context_____::d2db8a610f8c7c0785d2d92a6e8c450e", + "name": "European Marine Science", + "subject": [ + "marine", + "ocean", + "fish", + "aqua", + "sea" + ], + "type": "Research Community", + "zenodo_community": "https://zenodo.org/communities/oac_mes" + }, + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": { + "application/json": { + "expanded": true, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "# the community (link to documentation: https://graph.openaire.eu/docs/data-model/entities/community)\n", + "pretty_print(json.loads(community.where(\"acronym='mes'\").toJSON().first()), expanded=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "application/json": { + "provenance": { + "provenance": "Harvested", + "trust": "0.9" + }, + "reltype": { + "name": "isHostedBy", + "type": "provision" + }, + "source": { + "id": "50|doi_________::536dffbcf19e4d1f48f99bfb0d86d2e1", + "type": "result" + }, + "target": { + "id": "10|doajarticles::00f13fb5bcb74cf81c03e783bff91faf", + "type": "datasource" + }, + "validated": false + }, + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": { + "application/json": { + "expanded": true, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "# the relation (link to documentation: https://graph.openaire.eu/docs/data-model/relationships)\n", + "pretty_print(json.loads(relation.toJSON().first()), expanded=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecount
0isProvidedBy3534319
1provides3534312
2hosts2696438
3isHostedBy2696436
4IsRelatedTo399737
5isAuthorInstitutionOf231642
6hasAuthorInstitution231642
7IsCitedBy174058
8Cites174058
9HasVersion44402
10IsVersionOf44402
11isProducedBy38672
12produces38671
13IsPartOf34520
14HasPart34520
15hasParticipant31035
16isParticipant31035
17IsIdenticalTo12974
18HasAmongTopNSimilarDocuments5903
19IsAmongTopNSimilarDocuments5903
\n", + "
" + ], + "text/plain": [ + " name count\n", + "0 isProvidedBy 3534319\n", + "1 provides 3534312\n", + "2 hosts 2696438\n", + "3 isHostedBy 2696436\n", + "4 IsRelatedTo 399737\n", + "5 isAuthorInstitutionOf 231642\n", + "6 hasAuthorInstitution 231642\n", + "7 IsCitedBy 174058\n", + "8 Cites 174058\n", + "9 HasVersion 44402\n", + "10 IsVersionOf 44402\n", + "11 isProducedBy 38672\n", + "12 produces 38671\n", + "13 IsPartOf 34520\n", + "14 HasPart 34520\n", + "15 hasParticipant 31035\n", + "16 isParticipant 31035\n", + "17 IsIdenticalTo 12974\n", + "18 HasAmongTopNSimilarDocuments 5903\n", + "19 IsAmongTopNSimilarDocuments 5903" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query =\"\"\"SELECT reltype.name, \n", + " COUNT(*) AS count \n", + "FROM relations \n", + "GROUP BY reltype.name \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subject termcount
0General Medicine242423
1Electrical and Electronic Engineering66295
2General Materials Science62012
3General Chemistry56444
4Biochemistry52956
5Computer Science Applications52099
6Mechanical Engineering46967
7Condensed Matter Physics46413
8Surgery42772
9General Environmental Science41371
10Public Health, Environmental and Occupational ...40836
11FOS: Computer and information sciences40609
12Oncology40491
13Molecular Biology39883
14General Engineering39537
15FOS: Physical sciences39021
16Social and Behavioral Sciences38058
17Renewable Energy, Sustainability and the Envir...36529
18Education36364
19Materials Chemistry35187
\n", + "
" + ], + "text/plain": [ + " subject term count\n", + "0 General Medicine 242423\n", + "1 Electrical and Electronic Engineering 66295\n", + "2 General Materials Science 62012\n", + "3 General Chemistry 56444\n", + "4 Biochemistry 52956\n", + "5 Computer Science Applications 52099\n", + "6 Mechanical Engineering 46967\n", + "7 Condensed Matter Physics 46413\n", + "8 Surgery 42772\n", + "9 General Environmental Science 41371\n", + "10 Public Health, Environmental and Occupational ... 40836\n", + "11 FOS: Computer and information sciences 40609\n", + "12 Oncology 40491\n", + "13 Molecular Biology 39883\n", + "14 General Engineering 39537\n", + "15 FOS: Physical sciences 39021\n", + "16 Social and Behavioral Sciences 38058\n", + "17 Renewable Energy, Sustainability and the Envir... 36529\n", + "18 Education 36364\n", + "19 Materials Chemistry 35187" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"WITH terms AS (\n", + " SELECT explode(subjects.subject.value) AS `term` FROM publications\n", + ")\n", + "SELECT term AS `subject term`, \n", + " COUNT(*) AS count \n", + "FROM terms \n", + "GROUP BY term \n", + "ORDER BY count DESC\"\"\"\n", + "\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
leftrightcount
0businessbusiness.industry12625
1business.industrymedicine.medical_specialty5327
2businessmedicine.medical_specialty5323
3business.industrymedicine5190
4businessmedicine5187
5medicinemedicine.medical_specialty4396
6business.industrymedicine.disease3997
7businessmedicine.disease3994
8medicinemedicine.disease3754
9Computer sciencebusiness3275
10Computer sciencebusiness.industry3239
11media_commonmedia_common.quotation_subject3234
12medicine.diseasemedicine.medical_specialty3153
13Medicinebusiness2630
14Medicinebusiness.industry2630
15Artificial intelligencebusiness.industry1758
16Artificial intelligencebusiness1754
17Internal medicinemedicine.medical_specialty1715
18Internal medicinebusiness1670
19Internal medicinebusiness.industry1670
\n", + "
" + ], + "text/plain": [ + " left right count\n", + "0 business business.industry 12625\n", + "1 business.industry medicine.medical_specialty 5327\n", + "2 business medicine.medical_specialty 5323\n", + "3 business.industry medicine 5190\n", + "4 business medicine 5187\n", + "5 medicine medicine.medical_specialty 4396\n", + "6 business.industry medicine.disease 3997\n", + "7 business medicine.disease 3994\n", + "8 medicine medicine.disease 3754\n", + "9 Computer science business 3275\n", + "10 Computer science business.industry 3239\n", + "11 media_common media_common.quotation_subject 3234\n", + "12 medicine.disease medicine.medical_specialty 3153\n", + "13 Medicine business 2630\n", + "14 Medicine business.industry 2630\n", + "15 Artificial intelligence business.industry 1758\n", + "16 Artificial intelligence business 1754\n", + "17 Internal medicine medicine.medical_specialty 1715\n", + "18 Internal medicine business 1670\n", + "19 Internal medicine business.industry 1670" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"\n", + "WITH subjects AS (\n", + " WITH tmp (SELECT id, EXPLODE(subjects.subject) AS subject FROM publications) \n", + " SELECT id, subject.value AS `subject` FROM tmp WHERE subject.scheme != 'keyword'\n", + ")\n", + "SELECT l.subject AS left, \n", + " r.subject AS right, \n", + " COUNT(*) AS count\n", + "FROM subjects AS l JOIN subjects AS r ON l.id = r.id AND l.subject < r.subject\n", + "GROUP BY left, right\n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
conferencedateconferenceplaceeditionepississnLinkingissnOnlineissnPrintednamespvol
0NoneNone0012-835X0012-835XEast African Medical Journal
1NoneNoneNoneNoneNoneNoneNone0032-5910Powder Technology117586406
2NoneNoneNone0NoneNone1110-8460Noneالمجلة العلمیة لعلوم وفنون الریاضة00
3NoneNoneNone1319NoneNoneNone0883-5403The Journal of Arthroplasty131437
4NoneNoneNone837NoneNone1435-81151431-9276Microscopy and Microanalysis83628
5NoneNoneNone42133NoneNone1944-82521944-8244ACS Applied Materials &amp; Interfaces4212314
6NoneNoneNoneNoneNoneNoneNone0272-8842Ceramics InternationalNoneNone
7NoneNoneNone1023NoneNoneNone0020-0255Information Sciences994612
8NoneNoneNoneNoneNoneNone2632-959XNoneNano ExpressNoneNone
9NoneNoneNoneNoneNoneNone1863-46131865-1704International Review of EconomicsNoneNone
10NoneNoneNoneNoneNoneNoneNone2651-4141Ankara Hacı Bayram Veli Üniversitesi Hukuk Fak...NoneNone
11NoneNoneNoneNoneNoneNone2107-01800378-7966European Journal of Drug Metabolism and Pharma...NoneNone
12NoneNoneNoneNoneNoneNone1742-65961742-6588Journal of Physics: Conference Series0120082304
13NoneNoneNone9454NoneNone2574-09622574-0962ACS Applied Energy Materials94475
14NoneNoneNoneNoneNoneNoneNoneNone2022 International Conference on Intelligent C...NoneNone
15NoneNoneNone1321NoneNone2093-63111598-2351International Journal of Steel Structures130622
16NoneNoneNoneNoneNoneNone1475-47620004-0894AreaNoneNone
17NoneNoneNoneNoneNoneNone2326-831X2326-8298Annual Review of Statistics and Its ApplicationNone10
18NoneNoneNoneNoneNoneNoneNoneNoneSpintronics XVNoneNone
19NoneNoneNoneNoneNoneNone2072-6694NoneCancers329114
\n", + "
" + ], + "text/plain": [ + " conferencedate conferenceplace edition ep iss issnLinking issnOnline \n", + "0 None None 0012-835X \\\n", + "1 None None None None None None None \n", + "2 None None None 0 None None 1110-8460 \n", + "3 None None None 1319 None None None \n", + "4 None None None 837 None None 1435-8115 \n", + "5 None None None 42133 None None 1944-8252 \n", + "6 None None None None None None None \n", + "7 None None None 1023 None None None \n", + "8 None None None None None None 2632-959X \n", + "9 None None None None None None 1863-4613 \n", + "10 None None None None None None None \n", + "11 None None None None None None 2107-0180 \n", + "12 None None None None None None 1742-6596 \n", + "13 None None None 9454 None None 2574-0962 \n", + "14 None None None None None None None \n", + "15 None None None 1321 None None 2093-6311 \n", + "16 None None None None None None 1475-4762 \n", + "17 None None None None None None 2326-831X \n", + "18 None None None None None None None \n", + "19 None None None None None None 2072-6694 \n", + "\n", + " issnPrinted name sp \n", + "0 0012-835X East African Medical Journal \\\n", + "1 0032-5910 Powder Technology 117586 \n", + "2 None المجلة العلمیة لعلوم وفنون الریاضة 0 \n", + "3 0883-5403 The Journal of Arthroplasty 1314 \n", + "4 1431-9276 Microscopy and Microanalysis 836 \n", + "5 1944-8244 ACS Applied Materials & Interfaces 42123 \n", + "6 0272-8842 Ceramics International None \n", + "7 0020-0255 Information Sciences 994 \n", + "8 None Nano Express None \n", + "9 1865-1704 International Review of Economics None \n", + "10 2651-4141 Ankara Hacı Bayram Veli Üniversitesi Hukuk Fak... None \n", + "11 0378-7966 European Journal of Drug Metabolism and Pharma... None \n", + "12 1742-6588 Journal of Physics: Conference Series 012008 \n", + "13 2574-0962 ACS Applied Energy Materials 9447 \n", + "14 None 2022 International Conference on Intelligent C... None \n", + "15 1598-2351 International Journal of Steel Structures 1306 \n", + "16 0004-0894 Area None \n", + "17 2326-8298 Annual Review of Statistics and Its Application None \n", + "18 None Spintronics XV None \n", + "19 None Cancers 3291 \n", + "\n", + " vol \n", + "0 \n", + "1 406 \n", + "2 0 \n", + "3 37 \n", + "4 28 \n", + "5 14 \n", + "6 None \n", + "7 612 \n", + "8 None \n", + "9 None \n", + "10 None \n", + "11 None \n", + "12 2304 \n", + "13 5 \n", + "14 None \n", + "15 22 \n", + "16 None \n", + "17 10 \n", + "18 None \n", + "19 14 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT container.* \n", + "FROM publications \n", + "WHERE container IS NOT NULL\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecount
0Scientific Reports8152
1SSRN Electronic Journal8061
2Blood6527
3PLOS ONE6206
4Cureus5636
5International Journal of Molecular Sciences4793
6International Journal of Environmental Researc...4466
7Academy of Management Proceedings4391
8Sustainability4334
9ECS Meeting Abstracts4235
10Research, Society and Development4042
11Frontiers in Immunology3750
12Frontiers in Psychology3667
13Science of The Total Environment3630
14International journal of health sciences3592
15Frontiers in Oncology3562
16European Heart Journal3358
17Applied Sciences3111
18IOP Conference Series: Earth and Environmental...3047
19Journal of Cleaner Production3030
\n", + "
" + ], + "text/plain": [ + " name count\n", + "0 Scientific Reports 8152\n", + "1 SSRN Electronic Journal 8061\n", + "2 Blood 6527\n", + "3 PLOS ONE 6206\n", + "4 Cureus 5636\n", + "5 International Journal of Molecular Sciences 4793\n", + "6 International Journal of Environmental Researc... 4466\n", + "7 Academy of Management Proceedings 4391\n", + "8 Sustainability 4334\n", + "9 ECS Meeting Abstracts 4235\n", + "10 Research, Society and Development 4042\n", + "11 Frontiers in Immunology 3750\n", + "12 Frontiers in Psychology 3667\n", + "13 Science of The Total Environment 3630\n", + "14 International journal of health sciences 3592\n", + "15 Frontiers in Oncology 3562\n", + "16 European Heart Journal 3358\n", + "17 Applied Sciences 3111\n", + "18 IOP Conference Series: Earth and Environmental... 3047\n", + "19 Journal of Cleaner Production 3030" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"WITH journals AS (\n", + " SELECT container.* FROM publications WHERE container IS NOT NULL\n", + ")\n", + "SELECT name, \n", + " count(*) AS count \n", + "FROM journals \n", + "GROUP BY name \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectcount
0NSERC - unidentified - unidentified5817
1CIHR - unidentified - unidentified2216
2SSHRC - unidentified - unidentified1044
3EC - 822336 - Representation and Preservation ...921
4WT - unidentified - unidentified588
5EC - 773830 - Promoting One Health in Europe t...155
6EC - 786314 - Continuity and Rupture in Centra...60
7EC - 633053 - Implementation of activities des...55
8EC - 881603 - Graphene Flagship Core Project 347
9EC - 945539 - Human Brain Project Specific Gra...46
10EC - 824093 - The strong interaction at the fr...41
11EC - 872522 - Expanding our knowledge on Citiz...40
12EC - 823717 - Enabling Science and Technology ...40
13EC - 900014 - Fracture mechanics testing of ir...38
14EC - 823914 - Advanced Research Infrastructure...37
15EC - 733032 - European Human Biomonitoring Ini...32
16NSF - 1852977 - The Management and Operation o...31
17EC - 776613 - European Climate Prediction system31
18EC - 776816 - Project Ô: demonstration of plan...30
19EC - 812880 - Joint PhD Laboratory for New Mat...30
\n", + "
" + ], + "text/plain": [ + " project count\n", + "0 NSERC - unidentified - unidentified 5817\n", + "1 CIHR - unidentified - unidentified 2216\n", + "2 SSHRC - unidentified - unidentified 1044\n", + "3 EC - 822336 - Representation and Preservation ... 921\n", + "4 WT - unidentified - unidentified 588\n", + "5 EC - 773830 - Promoting One Health in Europe t... 155\n", + "6 EC - 786314 - Continuity and Rupture in Centra... 60\n", + "7 EC - 633053 - Implementation of activities des... 55\n", + "8 EC - 881603 - Graphene Flagship Core Project 3 47\n", + "9 EC - 945539 - Human Brain Project Specific Gra... 46\n", + "10 EC - 824093 - The strong interaction at the fr... 41\n", + "11 EC - 872522 - Expanding our knowledge on Citiz... 40\n", + "12 EC - 823717 - Enabling Science and Technology ... 40\n", + "13 EC - 900014 - Fracture mechanics testing of ir... 38\n", + "14 EC - 823914 - Advanced Research Infrastructure... 37\n", + "15 EC - 733032 - European Human Biomonitoring Ini... 32\n", + "16 NSF - 1852977 - The Management and Operation o... 31\n", + "17 EC - 776613 - European Climate Prediction system 31\n", + "18 EC - 776816 - Project Ô: demonstration of plan... 30\n", + "19 EC - 812880 - Joint PhD Laboratory for New Mat... 30" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT CONCAT_WS(' - ', IF(SIZE(funding.shortName) > 0, ARRAY_JOIN(funding.shortName, ',', '-'), '-'), COALESCE(code, '-'), SUBSTRING(title, 0, 50)) AS project,\n", + " COUNT(*) AS count \n", + "FROM projects JOIN relations ON projects.id = relations.source.id AND reltype.name = 'produces'\n", + "GROUP BY project \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
organizationcount
0CNRS638
1UH579
2CSIC379
3FHG322
4CNR317
5UCL310
6ETH Zurich300
7MPG299
8THE CHANCELLOR, MASTERS AND SCHOLARS OF THE UN...271
9CEA267
10KUL255
11UOXF249
12DTU209
13Delft University of Technology207
14UCPH203
15Imperial203
16University of Edinburgh181
17Aalto University180
18AU177
19EPFL172
\n", + "
" + ], + "text/plain": [ + " organization count\n", + "0 CNRS 638\n", + "1 UH 579\n", + "2 CSIC 379\n", + "3 FHG 322\n", + "4 CNR 317\n", + "5 UCL 310\n", + "6 ETH Zurich 300\n", + "7 MPG 299\n", + "8 THE CHANCELLOR, MASTERS AND SCHOLARS OF THE UN... 271\n", + "9 CEA 267\n", + "10 KUL 255\n", + "11 UOXF 249\n", + "12 DTU 209\n", + "13 Delft University of Technology 207\n", + "14 UCPH 203\n", + "15 Imperial 203\n", + "16 University of Edinburgh 181\n", + "17 Aalto University 180\n", + "18 AU 177\n", + "19 EPFL 172" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT COALESCE(legalshortname, legalname) AS organization, \n", + " COUNT(*) AS count \n", + "FROM organizations JOIN relations ON organizations.id = relations.source.id AND reltype.name = 'isParticipant'\n", + "GROUP BY organization \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
organizationcount
0UPV4980
1UL4819
2University of Oxford3859
3University of Cambridge3670
4UPC3041
5ULP2855
6AMU2624
7KUL2582
8UB2576
9University of Zagreb2555
10AAU2522
11University of California System2497
12University of Edinburgh2422
13Andalas University2350
14Amsterdam UMC2323
15ETH Zurich2276
16UPM2191
17INRIA2096
18UH2082
19VUA1982
\n", + "
" + ], + "text/plain": [ + " organization count\n", + "0 UPV 4980\n", + "1 UL 4819\n", + "2 University of Oxford 3859\n", + "3 University of Cambridge 3670\n", + "4 UPC 3041\n", + "5 ULP 2855\n", + "6 AMU 2624\n", + "7 KUL 2582\n", + "8 UB 2576\n", + "9 University of Zagreb 2555\n", + "10 AAU 2522\n", + "11 University of California System 2497\n", + "12 University of Edinburgh 2422\n", + "13 Andalas University 2350\n", + "14 Amsterdam UMC 2323\n", + "15 ETH Zurich 2276\n", + "16 UPM 2191\n", + "17 INRIA 2096\n", + "18 UH 2082\n", + "19 VUA 1982" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT COALESCE(legalshortname, legalname) AS organization, \n", + " COUNT(*) AS count \n", + "FROM organizations JOIN relations ON organizations.id = relations.source.id AND reltype.name = 'isAuthorInstitutionOf' \n", + "GROUP BY organization\n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
organizationpublicationdatasetsoftwareother
0UPV4974600
1UL449300326
2University of Oxford3711104044
3University of Cambridge346899499
4UPC30236012
5ULP28220033
6AMU25678148
7University of Zagreb25093043
8University of California System24830014
9AAU24701150
10UB243201143
11University of Edinburgh2414116
12Andalas University2342008
13Amsterdam UMC2323000
14UPM2188003
15ETH Zurich21860090
16INRIA20680721
17KUL206001521
18INSERM1954035
19VUA19450037
\n", + "
" + ], + "text/plain": [ + " organization publication dataset software other\n", + "0 UPV 4974 6 0 0\n", + "1 UL 4493 0 0 326\n", + "2 University of Oxford 3711 104 0 44\n", + "3 University of Cambridge 3468 99 4 99\n", + "4 UPC 3023 6 0 12\n", + "5 ULP 2822 0 0 33\n", + "6 AMU 2567 8 1 48\n", + "7 University of Zagreb 2509 3 0 43\n", + "8 University of California System 2483 0 0 14\n", + "9 AAU 2470 1 1 50\n", + "10 UB 2432 0 1 143\n", + "11 University of Edinburgh 2414 1 1 6\n", + "12 Andalas University 2342 0 0 8\n", + "13 Amsterdam UMC 2323 0 0 0\n", + "14 UPM 2188 0 0 3\n", + "15 ETH Zurich 2186 0 0 90\n", + "16 INRIA 2068 0 7 21\n", + "17 KUL 2060 0 1 521\n", + "18 INSERM 1954 0 3 5\n", + "19 VUA 1945 0 0 37" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT COALESCE(legalshortname, legalname) AS organization, \n", + " COUNT(IF(type = 'publication', 1, NULL)) AS publication,\n", + " COUNT(IF(type = 'dataset', 1, NULL)) AS dataset,\n", + " COUNT(IF(type = 'software', 1, NULL)) AS software,\n", + " COUNT(IF(type = 'other', 1, NULL)) AS other\n", + "FROM results JOIN organizations JOIN relations ON organizations.id = relations.source.id AND results.id = relations.target.id AND reltype.name = 'isAuthorInstitutionOf' \n", + "GROUP BY organization \n", + "ORDER BY publication DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
organizationopenembargoclosed
0UPV477011199
1UL460367144
2University of Oxford299983721
3UPC26071583
4KUL25181528
5UB24811958
6University of California System2450132
7University of Edinburgh2394110
8Andalas University235000
9ETH Zurich22511210
10University of Zagreb219114113
11UH2060017
12UPM19973511
13ULP19776791
14University of Cambridge194412181
15University of Copenhagen189605
16Amsterdam UMC1870153
17CSIC1585277
18VUA15031055
19UWO1427025
\n", + "
" + ], + "text/plain": [ + " organization open embargo closed\n", + "0 UPV 4770 11 199\n", + "1 UL 4603 67 144\n", + "2 University of Oxford 2999 837 21\n", + "3 UPC 2607 158 3\n", + "4 KUL 2518 15 28\n", + "5 UB 2481 19 58\n", + "6 University of California System 2450 1 32\n", + "7 University of Edinburgh 2394 1 10\n", + "8 Andalas University 2350 0 0\n", + "9 ETH Zurich 2251 12 10\n", + "10 University of Zagreb 2191 141 13\n", + "11 UH 2060 0 17\n", + "12 UPM 1997 35 11\n", + "13 ULP 1977 679 1\n", + "14 University of Cambridge 1944 12 181\n", + "15 University of Copenhagen 1896 0 5\n", + "16 Amsterdam UMC 1870 15 3\n", + "17 CSIC 1585 2 77\n", + "18 VUA 1503 10 55\n", + "19 UWO 1427 0 25" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT COALESCE(legalshortname, legalname) AS organization, \n", + " COUNT(IF(bestaccessright.label = 'OPEN', 1, NULL)) AS open,\n", + " COUNT(IF(bestaccessright.label = 'EMBARGO', 1, NULL)) AS embargo,\n", + " COUNT(IF(bestaccessright.label = 'CLOSED', 1, NULL)) AS closed\n", + "FROM organizations JOIN relations JOIN results ON organizations.id = relations.source.id AND results.id = relations.target.id AND reltype.name = 'isAuthorInstitutionOf'\n", + "GROUP BY organization\n", + "ORDER BY open DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryopenembargoclosed
0ES23724309618
1GB210341044994
2DE153563682772
3US11900365577
4FR93481763779
5CH6908136536
6PT622181457
7HR594415735
8BE5636245412
9FI54213543
10IT49891191658
11NL496329240
12DK465156491
13SI464267398
14CO412459155
15ID406009
16SE3690193
17CA345866778
18NO3338256
19TR27591391397
\n", + "
" + ], + "text/plain": [ + " country open embargo closed\n", + "0 ES 23724 309 618\n", + "1 GB 21034 1044 994\n", + "2 DE 15356 368 2772\n", + "3 US 11900 36 5577\n", + "4 FR 9348 176 3779\n", + "5 CH 6908 136 536\n", + "6 PT 6221 814 57\n", + "7 HR 5944 157 35\n", + "8 BE 5636 245 412\n", + "9 FI 5421 35 43\n", + "10 IT 4989 119 1658\n", + "11 NL 4963 29 240\n", + "12 DK 4651 56 491\n", + "13 SI 4642 67 398\n", + "14 CO 4124 59 155\n", + "15 ID 4060 0 9\n", + "16 SE 3690 1 93\n", + "17 CA 3458 66 778\n", + "18 NO 3338 2 56\n", + "19 TR 2759 139 1397" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT organizations.country.code AS country, \n", + " COUNT(IF(bestaccessright.label = 'OPEN', 1, NULL)) AS open,\n", + " COUNT(IF(bestaccessright.label = 'EMBARGO', 1, NULL)) AS embargo,\n", + " COUNT(IF(bestaccessright.label = 'CLOSED', 1, NULL)) AS closed\n", + "FROM organizations JOIN relations JOIN results ON organizations.id = relations.source.id AND results.id = relations.target.id AND reltype.name = 'isAuthorInstitutionOf'\n", + "WHERE organizations.country IS NOT NULL\n", + "GROUP BY organizations.country.code\n", + "ORDER BY open DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
leftrightcount
0DEDE12806
1GBGB9955
2DEGB6269
3ITIT5240
4ESES4906
5FRFR4830
6DEIT4683
7DEFR4573
8DEES4472
9NLNL3613
10DENL3427
11GBIT3332
12FRGB3328
13ESGB3195
14GBNL2860
15CHDE2676
16ESIT2665
17FRIT2456
18ESFR2365
19USUS2040
\n", + "
" + ], + "text/plain": [ + " left right count\n", + "0 DE DE 12806\n", + "1 GB GB 9955\n", + "2 DE GB 6269\n", + "3 IT IT 5240\n", + "4 ES ES 4906\n", + "5 FR FR 4830\n", + "6 DE IT 4683\n", + "7 DE FR 4573\n", + "8 DE ES 4472\n", + "9 NL NL 3613\n", + "10 DE NL 3427\n", + "11 GB IT 3332\n", + "12 FR GB 3328\n", + "13 ES GB 3195\n", + "14 GB NL 2860\n", + "15 CH DE 2676\n", + "16 ES IT 2665\n", + "17 FR IT 2456\n", + "18 ES FR 2365\n", + "19 US US 2040" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"WITH countryProject AS (\n", + " SELECT country.code AS country, \n", + " target.id AS id \n", + " FROM organizations JOIN relations ON reltype.name = 'isParticipant' AND source.id = organizations.id\n", + " WHERE country IS NOT NULL\n", + ")\n", + "SELECT l.country AS left, \n", + " r.country AS right,\n", + " COUNT(*) AS count \n", + "FROM countryProject AS l JOIN countryProject AS r ON l.id = r.id AND l.country <= r.country\n", + "GROUP BY left, right \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
leftrightcount
0DEGB6269
1DEIT4683
2DEFR4573
3DEES4472
4DENL3427
5GBIT3332
6FRGB3328
7ESGB3195
8GBNL2860
9CHDE2676
10ESIT2665
11FRIT2456
12ESFR2365
13CHGB1955
14DESE1804
15BEDE1759
16FRNL1726
17ITNL1708
18ESNL1596
19GBSE1491
\n", + "
" + ], + "text/plain": [ + " left right count\n", + "0 DE GB 6269\n", + "1 DE IT 4683\n", + "2 DE FR 4573\n", + "3 DE ES 4472\n", + "4 DE NL 3427\n", + "5 GB IT 3332\n", + "6 FR GB 3328\n", + "7 ES GB 3195\n", + "8 GB NL 2860\n", + "9 CH DE 2676\n", + "10 ES IT 2665\n", + "11 FR IT 2456\n", + "12 ES FR 2365\n", + "13 CH GB 1955\n", + "14 DE SE 1804\n", + "15 BE DE 1759\n", + "16 FR NL 1726\n", + "17 IT NL 1708\n", + "18 ES NL 1596\n", + "19 GB SE 1491" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"WITH countryProject AS (\n", + " SELECT country.code AS country, \n", + " target.id AS id \n", + " FROM organizations JOIN relations ON reltype.name = 'isParticipant' AND source.id = organizations.id\n", + " WHERE country IS NOT NULL\n", + ")\n", + "SELECT l.country AS left, \n", + " r.country AS right, \n", + " COUNT(*) AS count \n", + "FROM countryProject AS l JOIN countryProject AS r ON l.id = r.id AND l.country < r.country\n", + "GROUP BY left, right \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [], + "source": [ + "query=\"\"\"WITH orgProject AS (\n", + " SELECT COALESCE(legalshortname, legalname) AS organization, \n", + " target.id AS id \n", + " FROM organizations JOIN relations ON reltype.name = 'isParticipant' AND source.id = organizations.id\n", + ")\n", + "SELECT l.organization AS left,\n", + " r.organization AS right,\n", + " COUNT(*) AS count\n", + "FROM orgProject AS l JOIN orgProject AS r ON l.id = r.id AND l.organization < r.organization\n", + "GROUP BY left, right \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [], + "source": [ + "query=\"\"\"WITH orgProject AS (\n", + " SELECT COALESCE(legalshortname, legalname) AS organization, \n", + " target.id AS id \n", + " FROM organizations JOIN relations ON reltype.name = 'isAuthorInstitutionOf' AND source.id = organizations.id\n", + ")\n", + "SELECT l.organization AS left, \n", + " r.organization AS right,\n", + " COUNT(*) AS count \n", + "FROM orgProject AS l JOIN orgProject AS r ON l.id = r.id AND l.organization < r.organization\n", + "GROUP BY left, right \n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
accessrightyearcount
0OPEN20221391279
1CLOSED2022672566
2EMBARGO202214258
3RESTRICTED202212312
\n", + "
" + ], + "text/plain": [ + " accessright year count\n", + "0 OPEN 2022 1391279\n", + "1 CLOSED 2022 672566\n", + "2 EMBARGO 2022 14258\n", + "3 RESTRICTED 2022 12312" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query=\"\"\"SELECT bestaccessright.label AS accessright,\n", + " SUBSTRING(publicationdate, 0,4) AS year,\n", + " COUNT(*) AS count\n", + "FROM results\n", + "WHERE bestaccessright IS NOT NULL AND publicationdate IS NOT NULL\n", + "GROUP BY accessright, year\n", + "ORDER BY count DESC\"\"\"\n", + "spark.sql(query).limit(20).toPandas()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "autoscroll": "auto" + }, + "outputs": [], + "source": [ + "query=\"\"\"SELECT COUNT(*) AS count\n", + "FROM relations JOIN publications JOIN datasets ON reltype.name = 'IsSupplementedBy' AND publications.id = relations.source.id AND datasets.id = relations.target.id\"\"\"\n", + "spark.sql(query).limit(20).toPandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + }, + "name": "openaire_beginners_kit SQL" + }, + "nbformat": 4, + "nbformat_minor": 4 +}