recreating dataframes

This commit is contained in:
Andrea Mannocci 2021-10-07 09:37:21 +02:00
parent da8f0818df
commit 9dfedb2a7b
4 changed files with 2230 additions and 2230 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1096,7 +1096,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 32,
"metadata": {},
"outputs": [
{
@ -1150,9 +1150,9 @@
" <td>[\"zh\", \"en\"]</td>\n",
" <td>2021-03-25 10:16:18</td>\n",
" <td>2005-12-21 12:44:08</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"bibliographic_references\", \"theses_and_disse...</td>\n",
" <td>[{\"name\": \"university of hong kong\", \"alternat...</td>\n",
" <td>[multidisciplinary]</td>\n",
" <td>[bibliographic_references, theses_and_disserta...</td>\n",
" <td>[{'name': 'university of hong kong', 'alternat...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap...</td>\n",
" <td>NaN</td>\n",
@ -1169,9 +1169,9 @@
" <td>[\"cs\", \"en\", \"hu\", \"ru\"]</td>\n",
" <td>2021-03-25 09:48:31</td>\n",
" <td>2006-01-04 14:59:30</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"unpub_reports_and_working_papers\"]</td>\n",
" <td>[{\"name\": \"central european university\", \"alte...</td>\n",
" <td>[multidisciplinary]</td>\n",
" <td>[unpub_reports_and_working_papers]</td>\n",
" <td>[{'name': 'central european university', 'alte...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"eprints\", \"version\": \"2.2.1\"}</td>\n",
" <td>http://rss.archives.ceu.hu/perl/oai2</td>\n",
@ -1188,9 +1188,9 @@
" <td>[\"nl\", \"en\", \"fr\", \"de\", \"it\"]</td>\n",
" <td>2021-09-13 13:35:36</td>\n",
" <td>2006-01-04 12:07:07</td>\n",
" <td>[\"history and archaeology\", \"multidisciplinary...</td>\n",
" <td>[\"journal_articles\", \"theses_and_dissertations...</td>\n",
" <td>[{\"name\": \"european university institute\", \"al...</td>\n",
" <td>[history and archaeology, multidisciplinary, s...</td>\n",
" <td>[journal_articles, theses_and_dissertations, u...</td>\n",
" <td>[{'name': 'european university institute', 'al...</td>\n",
" <td>[{\"policy_url\": \"https://www.eui.eu/research/e...</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"5.2\"}</td>\n",
" <td>http://cadmus.eui.eu/oai/request</td>\n",
@ -1207,9 +1207,9 @@
" <td>[\"nl\", \"en\", \"fr\", \"de\"]</td>\n",
" <td>2021-04-16 15:23:52</td>\n",
" <td>2006-01-24 15:46:44</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"uhasselt\", \"alternativeName\": \"hass...</td>\n",
" <td>[multidisciplinary]</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'uhasselt', 'alternativeName': 'hass...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"1.7.2\"}</td>\n",
" <td>http://doclib.uhasselt.be/dspace-oai/request</td>\n",
@ -1226,9 +1226,9 @@
" <td>[\"nl\", \"en\"]</td>\n",
" <td>2021-04-16 15:22:03</td>\n",
" <td>2006-01-13 12:55:13</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"university of utrecht\", \"alternativ...</td>\n",
" <td>[multidisciplinary]</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'university of utrecht', 'alternativ...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n",
" <td>https://dspace.library.uu.nl/oai/request</td>\n",
@ -1303,25 +1303,25 @@
"4 2006-01-13 12:55:13 \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 [\"multidisciplinary\"] \n",
"1 [\"multidisciplinary\"] \n",
"2 [\"history and archaeology\", \"multidisciplinary... \n",
"3 [\"multidisciplinary\"] \n",
"4 [\"multidisciplinary\"] \n",
"0 [multidisciplinary] \n",
"1 [multidisciplinary] \n",
"2 [history and archaeology, multidisciplinary, s... \n",
"3 [multidisciplinary] \n",
"4 [multidisciplinary] \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [\"bibliographic_references\", \"theses_and_disse... \n",
"1 [\"unpub_reports_and_working_papers\"] \n",
"2 [\"journal_articles\", \"theses_and_dissertations... \n",
"3 [\"journal_articles\", \"conference_and_workshop_... \n",
"4 [\"journal_articles\", \"conference_and_workshop_... \n",
"0 [bibliographic_references, theses_and_disserta... \n",
"1 [unpub_reports_and_working_papers] \n",
"2 [journal_articles, theses_and_dissertations, u... \n",
"3 [journal_articles, conference_and_workshop_pap... \n",
"4 [journal_articles, conference_and_workshop_pap... \n",
"\n",
" OpenDOAR_organization \\\n",
"0 [{\"name\": \"university of hong kong\", \"alternat... \n",
"1 [{\"name\": \"central european university\", \"alte... \n",
"2 [{\"name\": \"european university institute\", \"al... \n",
"3 [{\"name\": \"uhasselt\", \"alternativeName\": \"hass... \n",
"4 [{\"name\": \"university of utrecht\", \"alternativ... \n",
"0 [{'name': 'university of hong kong', 'alternat... \n",
"1 [{'name': 'central european university', 'alte... \n",
"2 [{'name': 'european university institute', 'al... \n",
"3 [{'name': 'uhasselt', 'alternativeName': 'hass... \n",
"4 [{'name': 'university of utrecht', 'alternativ... \n",
"\n",
" OpenDOAR_policy_urls \\\n",
"0 [] \n",
@ -1345,17 +1345,17 @@
"4 https://dspace.library.uu.nl/oai/request OpenDOAR_101 "
]
},
"execution_count": 4,
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n",
" converters={'subject': ast.literal_eval,\n",
" 'alternativeNames': ast.literal_eval,\n",
" 'contentType': ast.literal_eval,\n",
" 'institution': ast.literal_eval\n",
" converters={'repository_metadata.content_subjects_phrases': ast.literal_eval,\n",
" 'repository_metadata.alternativename': ast.literal_eval,\n",
" 'repository_metadata.content_types': ast.literal_eval,\n",
" 'organization': ast.literal_eval\n",
" },\n",
" dtype={'system_metadata.id': str})\n",
"\n",