# -*- coding: utf-8 -*- import csv import os import json import click import logging from pathlib import Path from dotenv import find_dotenv, load_dotenv def get_value_or_none(obj, key): if key in obj: return obj[key]['value'] else: return None @click.command() @click.argument('input_filepath', type=click.Path(exists=True)) @click.argument('output_filepath', type=click.Path()) def main(input_filepath, output_filepath): """ Runs data processing scripts to turn raw data from (../raw) into cleaned data ready to be analyzed (saved in ../processed). """ logger = logging.getLogger(__name__) logger.info('making final data set from raw data') with open(os.path.join(input_filepath, 'OpenAIRE_DS_re3data_opendoar.json'), mode='r') as f: with open(os.path.join(output_filepath, 're3data_opendoar.csv'), mode='w') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow(['id', 'url', 'official_name', 'english_name', 'description', 'latitude', 'longitude', 'subjects']) for line in f: repo = json.loads(line) identifier = repo['id'] official_name = repo['officialname']['value'] url = get_value_or_none(repo, 'websiteurl') english_name = get_value_or_none(repo, 'englishname') description = get_value_or_none(repo, 'description') latitude = get_value_or_none(repo, 'latitude') longitude = get_value_or_none(repo, 'longitude') subjects = [] for s in repo['subjects']: subjects.append(s['value']) csv_writer.writerow([identifier, url, official_name, english_name, description, latitude, longitude, subjects]) if __name__ == '__main__': log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(level=logging.INFO, format=log_fmt) # not used in this stub but often useful for finding various files project_dir = Path(__file__).resolve().parents[2] # find .env automagically by walking up directories until it's found, then # load up the .env entries as environment variables load_dotenv(find_dotenv()) main()