data4impact/apps/data4impact-import-scripts/scripts/project_portfolios_metrics/script.sh

195 lines
12 KiB
Bash
Executable File

#!/bin/bash
file=/Users/claudio/workspace/data/d4i/november2018/D4I_Metrics_ARC_Release04_WP52_31Nov2018/statistics_on_release.json
db=metrics_tmp
echo "Recreating the database $db"
dropdb $db --if-exists
createdb $db
psql $db -f schema.sql
echo
echo "Importing table pp_metrics"
cat $file \
| jq -r 'to_entries | map([.key, .value.eu_contribution, .value.number_of_innovations, .value.number_of_companies_founded, .value.number_of_patents, .value.number_of_projects, .value.number_of_pubmed_publications, .value.number_of_rest_publications, .value.number_of_segments, .value.total_cost]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_metrics(id,eu_contribution,number_of_innovations,number_of_companies_founded,number_of_patents,number_of_projects,number_of_pubmed_publications,number_of_rest_publications,number_of_segments,total_cost) FROM STDIN CSV"
echo
echo "Importing table pp_countries_cooccurrences"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.countries_cooccurrences | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_countries_cooccurrences(funding,country1,country2,number) FROM STDIN CSV"
echo
echo "Importing table pp_eu_contribution_per_country"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.eu_contribution_per_country | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_eu_contribution_per_country(funding,country,contribution) FROM STDIN CSV"
echo
echo "Importing table pp_eu_contribution_per_participant_sector"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.eu_contribution_per_participant_sector | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_eu_contribution_per_participant_sector(funding,sector,contribution) FROM STDIN CSV"
echo
echo "Importing table pp_eu_contribution_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.eu_contribution_per_research_area | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_eu_contribution_per_research_area(funding,area,contribution) FROM STDIN CSV"
echo
echo "Importing table pp_eu_contribution_per_research_area_over_time"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.eu_contribution_per_research_area_over_time | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_eu_contribution_per_research_area_over_time(funding,year,area,contribution) FROM STDIN CSV"
echo
echo "Importing table pp_eu_contribution_per_year"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.eu_contribution_per_year | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_eu_contribution_per_year(funding,year,contribution) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_innovations_per_type"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_innovations_per_type | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_innovations_per_type(funding,type,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_innovations_per_type_per_country"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_innovations_per_type_per_country | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_innovations_per_type_per_country(funding,country,type,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_innovations_per_type_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_innovations_per_type_per_research_area | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_innovations_per_type_per_research_area(funding,area,type,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_patents_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_patents_per_research_area | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_patents_per_research_area(funding,area,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_projects_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_projects_per_research_area | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_projects_per_research_area(funding,area,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_pubmed_publications_per_country"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_pubmed_publications_per_country | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_pubmed_publications_per_country(funding,country,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_pubmed_publications_per_journal"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_pubmed_publications_per_journal | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_pubmed_publications_per_journal(funding,journal,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_pubmed_publications_per_journal_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_pubmed_publications_per_journal_per_research_area | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_pubmed_publications_per_journal_per_research_area(funding,journal,area,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_pubmed_publications_per_journal_per_year"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_pubmed_publications_per_journal_per_year | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_pubmed_publications_per_journal_per_year(funding,journal,year,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_pubmed_publications_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_pubmed_publications_per_research_area | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_pubmed_publications_per_research_area(funding,area,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_pubmed_publications_per_year"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_pubmed_publications_per_year | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_pubmed_publications_per_year(funding,year,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_pubmed_publications_per_year_per_journal"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_pubmed_publications_per_year_per_journal | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_pubmed_publications_per_year_per_journal(funding,year,journal,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_rest_publications_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_rest_publications_per_research_area | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_rest_publications_per_research_area(funding,area,number) FROM STDIN CSV"
echo
echo "Importing table pp_number_of_rest_publications_per_year"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.number_of_rest_publications_per_year | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_number_of_rest_publications_per_year(funding,year,number) FROM STDIN CSV"
echo
echo "Importing table pp_research_areas_cooccurrences"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.research_areas_cooccurrences | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_research_areas_cooccurrences(funding,area1,area2,number) FROM STDIN CSV"
echo
echo "Importing table pp_research_areas_to_icd10"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.research_areas_to_icd10 | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_research_areas_to_icd10(funding,area,icd10) FROM STDIN CSV"
echo
echo "Importing table pp_total_cost_per_research_area"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.total_cost_per_research_area | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_total_cost_per_research_area(funding,area,cost) FROM STDIN CSV"
echo
echo "Importing table pp_total_cost_per_research_area_over_time"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.total_cost_per_research_area_over_time | to_entries | map([.key, (.value | to_entries | map([.key, .value ]))]))])) | .[] | to_entries | .[0].value as $id | .[1].value[] | to_entries | .[0].value as $x | .[1].value | map([$id,$x,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_total_cost_per_research_area_over_time(funding,year,area,cost) FROM STDIN CSV"
echo
echo "Importing table pp_total_cost_per_year"
cat $file \
| jq -r 'to_entries | (map([.key, (.value.total_cost_per_year | to_entries | map([.key, .value] ))]) ) | .[] | to_entries | .[0].value as $id | .[1].value | map([$id,.[0],.[1]]) | .[] | @csv' \
| sed -e 's/"null"/-1/' \
| psql $db -c "COPY pp_total_cost_per_year(funding,year,cost) FROM STDIN CSV"
echo