added some network analysis
This commit is contained in:
parent
673fa5f9b5
commit
549b1477f2
|
@ -597,13 +597,14 @@
|
|||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT COALESCE(legalshortname, legalname) AS organization, \n",
|
||||
" COUNT(*) AS total,\n",
|
||||
" COUNT(IF(type = 'publication', 1, NULL)) AS publication,\n",
|
||||
" COUNT(IF(type = 'dataset', 1, NULL)) AS dataset,\n",
|
||||
" COUNT(IF(type = 'software', 1, NULL)) AS software,\n",
|
||||
" COUNT(IF(type = 'other', 1, NULL)) AS other\n",
|
||||
"FROM results JOIN organizations JOIN relations ON organizations.id = relations.source.id AND results.id = relations.target.id AND reltype.name = 'isAuthorInstitutionOf' \n",
|
||||
"GROUP BY organization \n",
|
||||
"ORDER BY publication DESC\n",
|
||||
"ORDER BY total DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
|
@ -627,12 +628,13 @@
|
|||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT COALESCE(legalshortname, legalname) AS organization, \n",
|
||||
" COUNT(*) as total,\n",
|
||||
" COUNT(IF(bestaccessright.label = 'OPEN', 1, NULL)) AS open,\n",
|
||||
" COUNT(IF(bestaccessright.label = 'EMBARGO', 1, NULL)) AS embargo,\n",
|
||||
" COUNT(IF(bestaccessright.label = 'CLOSED', 1, NULL)) AS closed\n",
|
||||
"FROM organizations JOIN relations JOIN results ON organizations.id = relations.source.id AND results.id = relations.target.id AND reltype.name = 'isAuthorInstitutionOf'\n",
|
||||
"GROUP BY organization\n",
|
||||
"ORDER BY open DESC\n",
|
||||
"ORDER BY total DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
|
@ -656,13 +658,14 @@
|
|||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"SELECT organizations.country.code AS country, \n",
|
||||
" COUNT(*) AS total,\n",
|
||||
" COUNT(IF(bestaccessright.label = 'OPEN', 1, NULL)) AS open,\n",
|
||||
" COUNT(IF(bestaccessright.label = 'EMBARGO', 1, NULL)) AS embargo,\n",
|
||||
" COUNT(IF(bestaccessright.label = 'CLOSED', 1, NULL)) AS closed\n",
|
||||
"FROM organizations JOIN relations JOIN results ON organizations.id = relations.source.id AND results.id = relations.target.id AND reltype.name = 'isAuthorInstitutionOf'\n",
|
||||
"WHERE organizations.country IS NOT NULL\n",
|
||||
"GROUP BY organizations.country.code\n",
|
||||
"ORDER BY open DESC\n",
|
||||
"ORDER BY total DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"spark.sql(query).limit(20).toPandas()"
|
||||
|
@ -702,6 +705,108 @@
|
|||
"spark.sql(query).limit(20).toPandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import igraph as ig\n",
|
||||
"\n",
|
||||
"G = ig.Graph.TupleList(\n",
|
||||
" edges=edges[['left', 'right', 'count']].values,\n",
|
||||
" vertex_name_attr='countrycode',\n",
|
||||
" edge_attrs = ['weight'],\n",
|
||||
" directed=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G.vcount()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G.ecount()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G.vs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G.es[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ig.plot(G, vertex_label=G.vs['countrycode'], target=ax)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G.vs.find(countrycode_eq = 'MY') # maldives"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"H = G.induced_subgraph(G.neighborhood(50))\n",
|
||||
"H.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"H.vs['color'] = 'grey'\n",
|
||||
"H.vs[0]['color'] = 'red'\n",
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ig.plot(H, target=ax)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G.transitivity_local_undirected(50)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
|
|
Loading…
Reference in New Issue