44 lines
1.6 KiB
Bash
Executable File
44 lines
1.6 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
excelFile="../../orig/patents/FP7_patents_full_list_Except_for_ICT.xlsx"
|
|
|
|
workdir=/tmp/patentsExcel
|
|
rm -rf "$workdir" && mkdir "$workdir"
|
|
|
|
echo
|
|
echo "Patents Import:"
|
|
|
|
#--------------------------------
|
|
echo " - Generating csv file"
|
|
csv="$workdir/patents.csv"
|
|
xlsx2csv -c UTF-8 "$excelFile" > $csv
|
|
|
|
#--------------------------------
|
|
echo " - Recreating the patents_excel database"
|
|
dropdb patents_excel --if-exists;
|
|
createdb patents_excel;
|
|
psql patents_excel -f schema.sql
|
|
|
|
if [[ -f "$csv" ]]; then
|
|
echo " - Importing data: $csv"
|
|
psql patents_excel -c "COPY data(pat_id,type_ip,appnum,appnt,title,pat_url,pat_ref,pat_auth,pat_num,pat_kind,note,appln_id,appln_title_patstat,priority_year,var15,projectid) FROM '$csv' CSV HEADER;"
|
|
else
|
|
echo " - Invalid file: $csv"
|
|
fi
|
|
|
|
psql patents -c "REFRESH MATERIALIZED VIEW document"
|
|
psql patents -c "REFRESH MATERIALIZED VIEW doc_other_identifier"
|
|
psql patents -c "REFRESH MATERIALIZED VIEW doc_project"
|
|
|
|
#--------------------------------
|
|
echo " - Generating json files"
|
|
rm -f ../../jsonfiles/patents_excel/*.json
|
|
|
|
#psql patents -c "COPY (SELECT row_to_json(t) FROM (SELECT * FROM document ) t) TO STDOUT" | sed 's/\\\\/\\/g' > ../../jsonfiles/patents_excel/document.json
|
|
#psql patents -c "COPY (SELECT row_to_json(t) FROM (SELECT * FROM doc_other_identifier) t) TO STDOUT" | sed 's/\\\\/\\/g' > ../../jsonfiles/patents_excel/doc_other_identifier.json
|
|
#psql patents -c "COPY (SELECT row_to_json(t) FROM (SELECT * FROM doc_project ) t) TO STDOUT" | sed 's/\\\\/\\/g' > ../../jsonfiles/patents_excel/doc_project.json
|
|
|
|
|
|
echo "Done."
|
|
echo
|