added datasets

2021-07-22 11:03:05 +02:00 · 2021-07-22 11:03:05 +02:00 · c6d01322c3
parent c052601c90
commit c6d01322c3
13 changed files with 40897 additions and 268 deletions
--- a/.gitignore
+++ b/.gitignore
@ -76,7 +76,7 @@ target/
 .ipynb_checkpoints/

 # exclude data from source control by default
-/data/
+# /data/

 # Mac OS-specific storage files
 .DS_Store
--- a/data/external/.gitkeep
+++ b/data/external/.gitkeep
--- a/data/interim/.gitkeep
+++ b/data/interim/.gitkeep
--- a/data/processed/.gitkeep
+++ b/data/processed/.gitkeep
--- a/data/raw/.gitkeep
+++ b/data/raw/.gitkeep
--- a/data/raw/FAIRsharingDBrec_summary20210304.csv
+++ b/data/raw/FAIRsharingDBrec_summary20210304.csv
--- a/data/raw/export_roar_CSV.csv
+++ b/data/raw/export_roar_CSV.csv
--- a/data/raw/openDoar.tsv
+++ b/data/raw/openDoar.tsv
--- a/data/raw/re3data.tsv
+++ b/data/raw/re3data.tsv
--- a/notebooks/01.1-explorative.ipynb
+++ b/notebooks/01.1-explorative.ipynb
--- a/notebooks/Untitled.ipynb
+++ b/notebooks/Untitled.ipynb
@ -0,0 +1,32 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/notebooks/[deprecated]-Explorative.ipynb
+++ b/notebooks/[deprecated]-Explorative.ipynb
--- a/src/data/make_dataset.py
+++ b/src/data/make_dataset.py
@ -24,27 +24,6 @@ def main(input_filepath, output_filepath):
    logger = logging.getLogger(__name__)
    logger.info('making final data set from raw data')
    
-    with open(os.path.join(input_filepath, 'OpenAIRE_DS_re3data_opendoar.json'), mode='r') as f:
-        with open(os.path.join(output_filepath, 're3data_opendoar.csv'), mode='w') as csvfile:
-            csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
-            csv_writer.writerow(['id', 'url', 'official_name', 'english_name', 'description', 'latitude', 'longitude', 'subjects'])
-        
-            for line in f:
-                repo = json.loads(line)
-                identifier = repo['id']
-                official_name = repo['officialname']['value']
-                url = get_value_or_none(repo, 'websiteurl')
-                english_name = get_value_or_none(repo, 'englishname')
-                description = get_value_or_none(repo, 'description')
-                latitude = get_value_or_none(repo, 'latitude')
-                longitude = get_value_or_none(repo, 'longitude')
-
-                subjects = []
-                for s in repo['subjects']:
-                    subjects.append(s['value'])
-
-                csv_writer.writerow([identifier, url, official_name, english_name, description, latitude, longitude, subjects])
-

 if __name__ == '__main__':
    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'