added datasets

master
Andrea Mannocci 3 years ago
parent c052601c90
commit c6d01322c3

2
.gitignore vendored

@ -76,7 +76,7 @@ target/
.ipynb_checkpoints/
# exclude data from source control by default
/data/
# /data/
# Mac OS-specific storage files
.DS_Store

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -0,0 +1,32 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

File diff suppressed because one or more lines are too long

@ -24,27 +24,6 @@ def main(input_filepath, output_filepath):
logger = logging.getLogger(__name__)
logger.info('making final data set from raw data')
with open(os.path.join(input_filepath, 'OpenAIRE_DS_re3data_opendoar.json'), mode='r') as f:
with open(os.path.join(output_filepath, 're3data_opendoar.csv'), mode='w') as csvfile:
csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_writer.writerow(['id', 'url', 'official_name', 'english_name', 'description', 'latitude', 'longitude', 'subjects'])
for line in f:
repo = json.loads(line)
identifier = repo['id']
official_name = repo['officialname']['value']
url = get_value_or_none(repo, 'websiteurl')
english_name = get_value_or_none(repo, 'englishname')
description = get_value_or_none(repo, 'description')
latitude = get_value_or_none(repo, 'latitude')
longitude = get_value_or_none(repo, 'longitude')
subjects = []
for s in repo['subjects']:
subjects.append(s['value'])
csv_writer.writerow([identifier, url, official_name, english_name, description, latitude, longitude, subjects])
if __name__ == '__main__':
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

Loading…
Cancel
Save