moved cells
This commit is contained in:
parent
a4c6e5321b
commit
95e2f4a8ba
|
@ -569,6 +569,115 @@
|
|||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<div class=\"alert alert-info\">\n",
|
||||
"Show the journals with the highest number of results published in\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"WITH journals AS (\n",
|
||||
" SELECT container.*\n",
|
||||
" FROM publications\n",
|
||||
" WHERE container IS NOT NULL\n",
|
||||
")\n",
|
||||
"SELECT name, count(*) AS count \n",
|
||||
"FROM journals \n",
|
||||
"GROUP BY name \n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<div class=\"alert alert-info\">\n",
|
||||
"Show the number of projects per organization; sort results in descending order; limit to the first 20.\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT COALESCE(legalshortname, legalname) AS organization, \n",
|
||||
" COUNT(*) AS count \n",
|
||||
"FROM organizations JOIN relations ON organizations.id = relations.source.id AND reltype.name = 'isParticipant'\n",
|
||||
"GROUP BY organization \n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<div class=\"alert alert-info\">\n",
|
||||
"Show projects with the highest number of associated results. \n",
|
||||
"\n",
|
||||
"Note: An \"unidentified\" project is a placeholder for all the association to a funder without knowing the specific project. It should be removed from the count.\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT funding.shortName, code, title, COUNT(*) AS count \n",
|
||||
"FROM projects JOIN relations ON projects.id = relations.source.id AND reltype.name = 'produces' AND not projects.title ilike '%unidentified%' \n",
|
||||
"GROUP BY funding.shortName, code, title\n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Strings can be manipulated as well on the fly"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT CONCAT_WS(' / ', \n",
|
||||
" IF(SIZE(funding.shortName) > 0, ARRAY_JOIN(funding.shortName, ',', '-'), '?'), \n",
|
||||
" COALESCE(code, '?'), \n",
|
||||
" SUBSTRING(title, 0, 50)) AS project, COUNT(*) AS count \n",
|
||||
"FROM projects JOIN relations ON projects.id = relations.source.id AND reltype.name = 'produces' AND NOT projects.title ilike '%unidentified%' \n",
|
||||
"GROUP BY project \n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
|
@ -610,128 +719,6 @@
|
|||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<div class=\"alert alert-info\">\n",
|
||||
"Show the journals with the highest number of results published in\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"autoscroll": "auto",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"WITH journals AS (\n",
|
||||
" SELECT container.*\n",
|
||||
" FROM publications\n",
|
||||
" WHERE container IS NOT NULL\n",
|
||||
")\n",
|
||||
"SELECT name, count(*) AS count \n",
|
||||
"FROM journals \n",
|
||||
"GROUP BY name \n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<div class=\"alert alert-info\">\n",
|
||||
"Show the number of projects per organization; sort results in descending order; limit to the first 20.\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"autoscroll": "auto"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT COALESCE(legalshortname, legalname) AS organization, \n",
|
||||
" COUNT(*) AS count \n",
|
||||
"FROM organizations JOIN relations ON organizations.id = relations.source.id AND reltype.name = 'isParticipant'\n",
|
||||
"GROUP BY organization \n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"<div class=\"alert alert-info\">\n",
|
||||
"Show projects with the highest number of associated results. \n",
|
||||
"\n",
|
||||
"Note: An \"unidentified\" project is a placeholder for all the association to a funder without knowing the specific project. It should be removed from the count.\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT funding.shortName, code, title, COUNT(*) AS count \n",
|
||||
"FROM projects JOIN relations ON projects.id = relations.source.id AND reltype.name = 'produces' AND not projects.title ilike '%unidentified%' \n",
|
||||
"GROUP BY funding.shortName, code, title\n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Strings can be manipulated as well on the fly"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"autoscroll": "auto"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT CONCAT_WS(' / ', \n",
|
||||
" IF(SIZE(funding.shortName) > 0, ARRAY_JOIN(funding.shortName, ',', '-'), '?'), \n",
|
||||
" COALESCE(code, '?'), \n",
|
||||
" SUBSTRING(title, 0, 50)) AS project, COUNT(*) AS count \n",
|
||||
"FROM projects JOIN relations ON projects.id = relations.source.id AND reltype.name = 'produces' AND NOT projects.title ilike '%unidentified%' \n",
|
||||
"GROUP BY project \n",
|
||||
"ORDER BY count DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
|
|
Loading…
Reference in New Issue