data4impact/apps/data4impact-import-scripts/scripts/ec_projects_cordis/cordis.sh

57 lines
2.6 KiB
Bash
Executable File

#!/bin/bash
urlOrgFp7=http://cordis.europa.eu/data/cordis-fp7organizations.xlsx
urlOrgH2020=http://cordis.europa.eu/data/cordis-h2020organizations.xlsx
workdir=/tmp/cordis
rm -rf "$workdir" && mkdir "$workdir"
echo
echo "cordis Import:"
#--------------------------------
echo " - Downloading files"
wget "$urlOrgFp7" -O "$workdir/fp7orgs.xlsx" -q --show-progress
wget "$urlOrgH2020" -O "$workdir/h2020orgs.xlsx" -q --show-progress
#--------------------------------
echo " - Generating csv files"
csvfp7="$workdir/fp7orgs.csv"
csvh2020="$workdir/h2020orgs.csv"
xlsx2csv -c UTF-8 "$workdir/fp7orgs.xlsx" > $csvfp7
xlsx2csv -c UTF-8 "$workdir/h2020orgs.xlsx" > $csvh2020
#--------------------------------
echo " - Recreating the cordis database"
dropdb cordis --if-exists;
createdb cordis;
psql cordis -f schema.sql
if [[ -f "$csvfp7" ]]; then
echo " - Importing FP7 participants: $csvfp7"
psql cordis -c "COPY participants(projectrcn,projectiD,projectacronym,role,orgid,orgname,orgshortname,activitytype,endofparticipation,eccontribution,country,street,city,postCode,organizationurl,vatnumber,contactform,contacttype,contacttitle,contactfirstnames,contactlastnames,contactfunction,contacttelephonenumber,contactfaxnumber) FROM '$csvfp7' CSV HEADER;"
psql cordis -c "UPDATE participants SET fundingprogram='FP7' WHERE fundingprogram IS NULL"
else
echo " - Invalid file fp7: $csvfp7"
fi
if [[ -f "$csvh2020" ]]; then
echo " - Importing H2020 participants: $csvh2020"
psql cordis -c "COPY participants(projectrcn,projectiD,projectacronym,role,orgid,orgname,orgshortname,activitytype,endofparticipation,eccontribution,country,street,city,postCode,organizationurl,vatnumber,contactform,contacttype,contacttitle,contactfirstnames,contactlastnames,contactfunction,contacttelephonenumber,contactfaxnumber) FROM '$csvh2020' CSV HEADER;"
psql cordis -c "UPDATE participants SET fundingprogram='H2020' WHERE fundingprogram IS NULL"
else
echo " - Invalid file h2020: $csvh2020"
fi
#--------------------------------
echo " - Generating json files"
rm -f ../../jsonfiles/cordis/*.json
psql cordis -f projects2json.sql | sed 's/\\\\/\\/g' > ../../jsonfiles/cordis/project.json
psql cordis -f orgs2json.sql | sed 's/\\\\/\\/g' > ../../jsonfiles/cordis/organization.json
psql cordis -f projOrg2json.sql | sed 's/\\\\/\\/g' > ../../jsonfiles/cordis/projectOrganization.json
psql cordis -f projOtherId2json.sql | sed 's/\\\\/\\/g' > ../../jsonfiles/cordis/projectOtherId.json
psql cordis -f orgOtherId2json.sql | sed 's/\\\\/\\/g' > ../../jsonfiles/cordis/organizationOtherId.json
echo "Done."
echo