forked from D-Net/dnet-hadoop
added script to regenerate hostedBy Map following instruction defined on ticket #7539
updated hosted By Map
This commit is contained in:
parent
90e197a563
commit
3aa2020b24
|
@ -0,0 +1,63 @@
|
|||
from urllib.request import urlopen
|
||||
import json
|
||||
|
||||
|
||||
def retrieve_datacite_clients(base_url):
|
||||
datacite_clients = {}
|
||||
while base_url is not None:
|
||||
with urlopen(base_url) as response:
|
||||
print(f"requesting {base_url}")
|
||||
response_content = response.read()
|
||||
data = json.loads(response_content)
|
||||
if 'data' in data and len(data['data'])>0:
|
||||
for item in data['data']:
|
||||
datacite_clients[item['id'].lower()]= item['attributes']['re3data'].lower().replace("https://doi.org/","")
|
||||
base_url = data['links']['next']
|
||||
else:
|
||||
base_url = None
|
||||
return datacite_clients
|
||||
|
||||
|
||||
def retrieve_r3data(start_url):
|
||||
r3data_clients = {}
|
||||
page_number = 1
|
||||
base_url = start_url
|
||||
while base_url is not None:
|
||||
with urlopen(base_url) as response:
|
||||
print(f"requesting {base_url}")
|
||||
response_content = response.read()
|
||||
data = json.loads(response_content)
|
||||
if 'data' in data and len(data['data'])>0:
|
||||
for item in data['data']:
|
||||
r3data_clients[item['id'].lower()]= dict(
|
||||
openaire_id= "re3data_____::"+item['attributes']['re3dataId'].lower(),
|
||||
official_name=item['attributes']['repositoryName']
|
||||
)
|
||||
page_number +=1
|
||||
base_url = f"{start_url}&page[number]={page_number}"
|
||||
else:
|
||||
base_url = None
|
||||
return r3data_clients
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
base_url ="https://api.datacite.org/clients?query=re3data_id:*&page[size]=250"
|
||||
|
||||
dc = retrieve_datacite_clients(base_url)
|
||||
r3 = retrieve_r3data("https://api.datacite.org/re3data?page[size]=250")
|
||||
|
||||
result = {}
|
||||
|
||||
for item in dc:
|
||||
res = dc[item].lower()
|
||||
if res not in r3:
|
||||
print(f"missing {res} for {item} in dictionary")
|
||||
else:
|
||||
result[item.upper()]= dict(openaire_id=r3[res]["openaire_id"],datacite_name=r3[res]["official_name"], official_name=r3[res]["official_name"] )
|
||||
|
||||
|
||||
with open('hostedBy_map.json', 'w', encoding='utf8') as json_file:
|
||||
json.dump(result, json_file, ensure_ascii=False, indent=1)
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue