data4impact/apps/data4impact-import-scripts/scripts/companyData/companydata_innovation_text...

41 lines
1.2 KiB
Bash
Executable File

#!/bin/bash
detailsFile=../../orig/CompanyData/D4I_company_innovation_texts.json
workdir=/tmp/companydata_texts
rm -rf "$workdir" && mkdir "$workdir"
echo
echo "CompanyData Innovation texts Import:"
#--------------------------------
echo " - Generating csv files"
csvDetails="$workdir/details.csv"
cat $detailsFile | jq --slurp -r '(map(keys) | add | unique) as $cols | map(. as $row | $cols | map($row[.])) as $rows | $cols, $rows[] | @csv' > $csvDetails
#--------------------------------
echo " - Recreating the companydata_texts database"
dropdb companydata_texts --if-exists;
createdb companydata_texts;
psql companydata_texts -f schema_texts.sql
if [[ -f "$csvDetails" ]]; then
echo " - Importing details: $csvDetails"
psql companydata_texts -c "COPY data(company_id,prediction_revised,site_url,source,text_clean_gentle,text_clean_strong,text_is_duplicated) FROM '$csvDetails' CSV HEADER;"
else
echo " - Invalid file: $csvDetails"
fi
#--------------------------------
echo " - Generating json files"
rm -f ../../jsonfiles/companydata_texts/*.json
psql companydata_texts -f innovationTexts2json.sql | sed 's/\\\\/\\/g' > ../../jsonfiles/companydata_texts/orgCompanyInnovationTexts.json
echo "Done."
echo