added datasets
This commit is contained in:
parent
c052601c90
commit
c6d01322c3
|
@ -76,7 +76,7 @@ target/
|
|||
.ipynb_checkpoints/
|
||||
|
||||
# exclude data from source control by default
|
||||
/data/
|
||||
# /data/
|
||||
|
||||
# Mac OS-specific storage files
|
||||
.DS_Store
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -24,27 +24,6 @@ def main(input_filepath, output_filepath):
|
|||
logger = logging.getLogger(__name__)
|
||||
logger.info('making final data set from raw data')
|
||||
|
||||
with open(os.path.join(input_filepath, 'OpenAIRE_DS_re3data_opendoar.json'), mode='r') as f:
|
||||
with open(os.path.join(output_filepath, 're3data_opendoar.csv'), mode='w') as csvfile:
|
||||
csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
csv_writer.writerow(['id', 'url', 'official_name', 'english_name', 'description', 'latitude', 'longitude', 'subjects'])
|
||||
|
||||
for line in f:
|
||||
repo = json.loads(line)
|
||||
identifier = repo['id']
|
||||
official_name = repo['officialname']['value']
|
||||
url = get_value_or_none(repo, 'websiteurl')
|
||||
english_name = get_value_or_none(repo, 'englishname')
|
||||
description = get_value_or_none(repo, 'description')
|
||||
latitude = get_value_or_none(repo, 'latitude')
|
||||
longitude = get_value_or_none(repo, 'longitude')
|
||||
|
||||
subjects = []
|
||||
for s in repo['subjects']:
|
||||
subjects.append(s['value'])
|
||||
|
||||
csv_writer.writerow([identifier, url, official_name, english_name, description, latitude, longitude, subjects])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
|
|
Loading…
Reference in New Issue