61 lines
2.3 KiB
Python
61 lines
2.3 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
import csv
|
||
|
import os
|
||
|
import json
|
||
|
import click
|
||
|
import logging
|
||
|
from pathlib import Path
|
||
|
from dotenv import find_dotenv, load_dotenv
|
||
|
|
||
|
|
||
|
def get_value_or_none(obj, key):
|
||
|
if key in obj:
|
||
|
return obj[key]['value']
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
@click.command()
|
||
|
@click.argument('input_filepath', type=click.Path(exists=True))
|
||
|
@click.argument('output_filepath', type=click.Path())
|
||
|
def main(input_filepath, output_filepath):
|
||
|
""" Runs data processing scripts to turn raw data from (../raw) into
|
||
|
cleaned data ready to be analyzed (saved in ../processed).
|
||
|
"""
|
||
|
logger = logging.getLogger(__name__)
|
||
|
logger.info('making final data set from raw data')
|
||
|
|
||
|
with open(os.path.join(input_filepath, 'OpenAIRE_DS_re3data_opendoar.json'), mode='r') as f:
|
||
|
with open(os.path.join(output_filepath, 're3data_opendoar.csv'), mode='w') as csvfile:
|
||
|
csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||
|
csv_writer.writerow(['id', 'url', 'official_name', 'english_name', 'description', 'latitude', 'longitude', 'subjects'])
|
||
|
|
||
|
for line in f:
|
||
|
repo = json.loads(line)
|
||
|
identifier = repo['id']
|
||
|
official_name = repo['officialname']['value']
|
||
|
url = get_value_or_none(repo, 'websiteurl')
|
||
|
english_name = get_value_or_none(repo, 'englishname')
|
||
|
description = get_value_or_none(repo, 'description')
|
||
|
latitude = get_value_or_none(repo, 'latitude')
|
||
|
longitude = get_value_or_none(repo, 'longitude')
|
||
|
|
||
|
subjects = []
|
||
|
for s in repo['subjects']:
|
||
|
subjects.append(s['value'])
|
||
|
|
||
|
csv_writer.writerow([identifier, url, official_name, english_name, description, latitude, longitude, subjects])
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||
|
logging.basicConfig(level=logging.INFO, format=log_fmt)
|
||
|
|
||
|
# not used in this stub but often useful for finding various files
|
||
|
project_dir = Path(__file__).resolve().parents[2]
|
||
|
|
||
|
# find .env automagically by walking up directories until it's found, then
|
||
|
# load up the .env entries as environment variables
|
||
|
load_dotenv(find_dotenv())
|
||
|
|
||
|
main()
|