#!/usr/bin/perl # NOTE: this script MUST NOT BE USED, it will be used only if grid.ac will be deprecated use File::Path 'make_path'; use Digest::MD5 qw(md5_hex); use JSON::Parse 'json_file_to_perl'; use Data::Dumper; use strict; use utf8; binmode(STDOUT, ":utf8"); # THE LATEST VERSION OF row.json IS AVAILABLE AT https://figshare.com/collections/ROR_Data/4596503 my $inputFile = '../../../../data/ror.json'; my $outputDir = '../../../../data/ror_tables'; make_path $outputDir or die "Failed to create path: $outputDir" unless (-d $outputDir); my $data = json_file_to_perl($inputFile); open(my $OUT_ORGS , ">$outputDir/organizations.tsv") or die("Can't open an output file"); open(my $OUT_OTHER_IDS , ">$outputDir/other_ids.tsv") or die("Can't open an output file"); open(my $OUT_OTHER_NAMES , ">$outputDir/other_names.tsv") or die("Can't open an output file"); open(my $OUT_ACRONYMS , ">$outputDir/acronyms.tsv") or die("Can't open an output file"); open(my $OUT_RELS , ">$outputDir/relationships.tsv") or die("Can't open an output file"); open(my $OUT_URLS , ">$outputDir/urls.tsv") or die("Can't open an output file"); binmode($OUT_ORGS, ":utf8"); binmode($OUT_OTHER_IDS, ":utf8"); binmode($OUT_OTHER_NAMES, ":utf8"); binmode($OUT_ACRONYMS, ":utf8"); binmode($OUT_RELS, ":utf8"); binmode($OUT_URLS, ":utf8"); foreach my $record (@$data) { my $id = 'tmp::' . md5_hex($record->{'id'}); write_orgs($id, $record, $OUT_ORGS); write_other_ids($id, $record, $OUT_OTHER_IDS); write_other_names($id, $record, $OUT_OTHER_NAMES); write_acronyms($id, $record, $OUT_ACRONYMS); # write_rels($id, $record, $OUT_RELS); write_urls($id, $record, $OUT_URLS); } close($OUT_ORGS); close($OUT_OTHER_IDS); close($OUT_OTHER_NAMES); close($OUT_ACRONYMS); close($OUT_RELS); close($OUT_URLS); print "\nDone.\n\n"; sub write_orgs { my ($id, $record, $OUT) = @_; print $OUT $id; print $OUT "\t"; print $OUT $record->{'name'}; print $OUT "\t"; print $OUT getFirstArrayElem($record->{'types'}, 'UNKNOWN'); print $OUT "\t"; print $OUT 0; # lat - TODO MISSING print $OUT "\t"; print $OUT 0; # lng - TODO MISSING print $OUT "\t"; print $OUT ""; # city - TODO MISSING print $OUT "\t"; print $OUT $record->{'country'}->{'country_code'}; print $OUT "\t"; print $OUT "import:ror"; # created_by print $OUT "\t"; print $OUT "import:ror"; # modified_by print $OUT "\n"; } sub write_other_ids { my ($id, $record, $OUT) = @_; _write_other_ids($id, $record->{'id'}, 'ror', $OUT); while (my ($type, $v) = each (%{$record->{'external_ids'}})) { my $all = $v->{'all'}; if (ref $all eq 'ARRAY') { foreach my $other (@$all) { _write_other_ids($id, $other, $type, $OUT); } } else { _write_other_ids($id, $all, $type, $OUT); } } } sub _write_other_ids { my ($id, $other, $type, $OUT) = @_; if ($other) { print $OUT $id; print $OUT "\t"; print $OUT $other; print $OUT "\t"; print $OUT $type; print $OUT "\n"; } } sub write_other_names { my ($id, $record, $OUT) = @_; _write_other_names($id, $record->{'name'}, 'en', $OUT); foreach my $alias (@{$record->{'aliases'}}) { _write_other_names($id, $alias, 'UNKNOWN', $OUT); } foreach my $l (@{$record->{'labels'}}) { _write_other_names($id, $l->{label}, $l->{'iso639'}, $OUT); } } sub _write_other_names { my ($id, $name, $lang, $OUT) = @_; if ($name) { print $OUT $id; print $OUT "\t"; print $OUT $name; print $OUT "\t"; print $OUT $lang; print $OUT "\n"; } } sub write_acronyms { my ($id, $record, $OUT) = @_; foreach my $acr (@{$record->{'acronyms'}}) { print $OUT $id; print $OUT "\t"; print $OUT $acr; print $OUT "\n"; } } sub write_rels { my ($id, $record, $OUT) = @_; # print $OUT $id; # print $OUT "\t"; # print $OUT ""; # reltype - TODO # print $OUT "\t"; # print $OUT ""; # id2 - TODO # Example: 'tmp::'||md5(o.grid_id) # print $OUT "\n"; } sub write_urls { my ($id, $record, $OUT) = @_; foreach my $url (@{$record->{'links'}}) { print $OUT $id; print $OUT "\t"; print $OUT $url; print $OUT "\n"; } } sub getFirstArrayElem { my ($arr, $default) = @_; if (@$arr) { return @$arr[0]; } else { return $default; } } 1;