2020-07-15 15:52:58 +02:00
|
|
|
#!/usr/bin/perl
|
|
|
|
|
2020-07-16 16:11:24 +02:00
|
|
|
# NOTE: this script MUST NOT BE USED, it will be used only if grid.ac will be deprecated
|
|
|
|
|
2020-07-15 15:52:58 +02:00
|
|
|
use File::Path 'make_path';
|
|
|
|
use Digest::MD5 qw(md5_hex);
|
|
|
|
use JSON::Parse 'json_file_to_perl';
|
|
|
|
use Data::Dumper;
|
|
|
|
use strict;
|
|
|
|
use utf8;
|
|
|
|
|
|
|
|
binmode(STDOUT, ":utf8");
|
|
|
|
|
|
|
|
# THE LATEST VERSION OF row.json IS AVAILABLE AT https://figshare.com/collections/ROR_Data/4596503
|
|
|
|
|
|
|
|
my $inputFile = '../../../../data/ror.json';
|
|
|
|
my $outputDir = '../../../../data/ror_tables';
|
|
|
|
|
|
|
|
make_path $outputDir or die "Failed to create path: $outputDir" unless (-d $outputDir);
|
|
|
|
|
|
|
|
my $data = json_file_to_perl($inputFile);
|
|
|
|
|
|
|
|
open(my $OUT_ORGS , ">$outputDir/organizations.tsv") or die("Can't open an output file");
|
|
|
|
open(my $OUT_OTHER_IDS , ">$outputDir/other_ids.tsv") or die("Can't open an output file");
|
|
|
|
open(my $OUT_OTHER_NAMES , ">$outputDir/other_names.tsv") or die("Can't open an output file");
|
|
|
|
open(my $OUT_ACRONYMS , ">$outputDir/acronyms.tsv") or die("Can't open an output file");
|
|
|
|
open(my $OUT_RELS , ">$outputDir/relationships.tsv") or die("Can't open an output file");
|
|
|
|
open(my $OUT_URLS , ">$outputDir/urls.tsv") or die("Can't open an output file");
|
|
|
|
|
|
|
|
binmode($OUT_ORGS, ":utf8");
|
|
|
|
binmode($OUT_OTHER_IDS, ":utf8");
|
|
|
|
binmode($OUT_OTHER_NAMES, ":utf8");
|
|
|
|
binmode($OUT_ACRONYMS, ":utf8");
|
|
|
|
binmode($OUT_RELS, ":utf8");
|
|
|
|
binmode($OUT_URLS, ":utf8");
|
|
|
|
|
|
|
|
foreach my $record (@$data) {
|
2020-07-16 16:11:24 +02:00
|
|
|
my $id = 'tmp::' . md5_hex($record->{'id'});
|
|
|
|
|
|
|
|
write_orgs($id, $record, $OUT_ORGS);
|
|
|
|
write_other_ids($id, $record, $OUT_OTHER_IDS);
|
|
|
|
write_other_names($id, $record, $OUT_OTHER_NAMES);
|
|
|
|
write_acronyms($id, $record, $OUT_ACRONYMS);
|
|
|
|
# write_rels($id, $record, $OUT_RELS);
|
|
|
|
write_urls($id, $record, $OUT_URLS);
|
2020-07-15 15:52:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
close($OUT_ORGS);
|
|
|
|
close($OUT_OTHER_IDS);
|
|
|
|
close($OUT_OTHER_NAMES);
|
|
|
|
close($OUT_ACRONYMS);
|
|
|
|
close($OUT_RELS);
|
|
|
|
close($OUT_URLS);
|
|
|
|
|
|
|
|
print "\nDone.\n\n";
|
|
|
|
|
|
|
|
sub write_orgs {
|
|
|
|
my ($id, $record, $OUT) = @_;
|
|
|
|
|
|
|
|
|
|
|
|
print $OUT $id;
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $record->{'name'};
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT getFirstArrayElem($record->{'types'}, 'UNKNOWN');
|
|
|
|
print $OUT "\t";
|
2020-07-16 16:11:24 +02:00
|
|
|
print $OUT 0; # lat - TODO MISSING
|
2020-07-15 15:52:58 +02:00
|
|
|
print $OUT "\t";
|
2020-07-16 16:11:24 +02:00
|
|
|
print $OUT 0; # lng - TODO MISSING
|
2020-07-15 15:52:58 +02:00
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT ""; # city - TODO MISSING
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $record->{'country'}->{'country_code'};
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT "import:ror"; # created_by
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT "import:ror"; # modified_by
|
|
|
|
print $OUT "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
sub write_other_ids {
|
|
|
|
my ($id, $record, $OUT) = @_;
|
|
|
|
|
|
|
|
_write_other_ids($id, $record->{'id'}, 'ror', $OUT);
|
|
|
|
|
|
|
|
while (my ($type, $v) = each (%{$record->{'external_ids'}})) {
|
2020-07-16 16:11:24 +02:00
|
|
|
my $all = $v->{'all'};
|
|
|
|
|
|
|
|
if (ref $all eq 'ARRAY') {
|
|
|
|
foreach my $other (@$all) {
|
2020-07-15 15:52:58 +02:00
|
|
|
_write_other_ids($id, $other, $type, $OUT);
|
|
|
|
}
|
2020-07-16 16:11:24 +02:00
|
|
|
} else {
|
|
|
|
_write_other_ids($id, $all, $type, $OUT);
|
2020-07-15 15:52:58 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub _write_other_ids {
|
|
|
|
my ($id, $other, $type, $OUT) = @_;
|
|
|
|
|
|
|
|
if ($other) {
|
|
|
|
print $OUT $id;
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $other;
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $type;
|
|
|
|
print $OUT "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub write_other_names {
|
|
|
|
my ($id, $record, $OUT) = @_;
|
|
|
|
|
|
|
|
_write_other_names($id, $record->{'name'}, 'en', $OUT);
|
|
|
|
|
|
|
|
foreach my $alias (@{$record->{'aliases'}}) {
|
|
|
|
_write_other_names($id, $alias, 'UNKNOWN', $OUT);
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach my $l (@{$record->{'labels'}}) {
|
|
|
|
_write_other_names($id, $l->{label}, $l->{'iso639'}, $OUT);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _write_other_names {
|
|
|
|
my ($id, $name, $lang, $OUT) = @_;
|
|
|
|
|
|
|
|
if ($name) {
|
|
|
|
print $OUT $id;
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $name;
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $lang;
|
|
|
|
print $OUT "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sub write_acronyms {
|
|
|
|
my ($id, $record, $OUT) = @_;
|
|
|
|
|
|
|
|
foreach my $acr (@{$record->{'acronyms'}}) {
|
|
|
|
print $OUT $id;
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $acr;
|
|
|
|
print $OUT "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub write_rels {
|
|
|
|
my ($id, $record, $OUT) = @_;
|
|
|
|
|
2020-07-16 16:11:24 +02:00
|
|
|
# print $OUT $id;
|
|
|
|
# print $OUT "\t";
|
|
|
|
# print $OUT ""; # reltype - TODO
|
|
|
|
# print $OUT "\t";
|
|
|
|
# print $OUT ""; # id2 - TODO # Example: 'tmp::'||md5(o.grid_id)
|
|
|
|
# print $OUT "\n";
|
2020-07-15 15:52:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
sub write_urls {
|
|
|
|
my ($id, $record, $OUT) = @_;
|
|
|
|
|
|
|
|
|
|
|
|
foreach my $url (@{$record->{'links'}}) {
|
|
|
|
print $OUT $id;
|
|
|
|
print $OUT "\t";
|
|
|
|
print $OUT $url;
|
|
|
|
print $OUT "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sub getFirstArrayElem {
|
|
|
|
my ($arr, $default) = @_;
|
|
|
|
|
|
|
|
if (@$arr) {
|
|
|
|
return @$arr[0];
|
|
|
|
} else {
|
|
|
|
return $default;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
1;
|