added opendoar dump; refactored dump scripts

master
Andrea Mannocci 1 year ago
parent 85d2dc6cb7
commit a055b4247e

3
.gitignore vendored

@ -87,3 +87,6 @@ target/
# Mypy cache
.mypy_cache/
# Configurations
config.py

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,9 +1,13 @@
import config
import requests
import json
import time
DATE = time.strftime('%Y-%m-%d')
url = "https://api.fairsharing.org/users/sign_in"
payload="{\"user\": {\"login\":\"andrea.mannocci@isti.cnr.it\",\"password\":\"\"} }"
payload="{\"user\": {\"login\":\"%s\",\"password\":\"%s\"} }" % (config.fairsharing_username, config.fairsharing_password)
headers = {
'Accept': 'application/json',
'Content-Type': 'application/json'
@ -22,13 +26,19 @@ headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + token,
}
print(headers)
# print(headers)
with open('./fairsharing.json', 'w') as file_out:
for page in range(1, 5):
url = 'https://api.fairsharing.org/databases/?page[number]=%s&page[size]=500' % page
with open('../../data/raw/fairsharing_dump_%s.json' % DATE, 'w') as file_out:
page = 1
size = 500
while(True):
url = 'https://api.fairsharing.org/databases/?page[number]=%s&page[size]=%s' % (page,size)
print(url)
response = requests.request("GET", url, headers=headers)
file_out.writelines('\n'.join([json.dumps(record) for record in response.json()['data']]))
file_out.write('\n')
file_out.write('\n')
if len(response.json()['data']) < size:
break
page += 1

@ -0,0 +1,21 @@
import config
import requests
import json
import time
DATE = time.strftime('%Y-%m-%d')
with open('../../data/raw/opendoar_dump_%s.json' % DATE, 'w') as file_out:
size = 100
offset = 0
while(True):
url = 'https://v2.sherpa.ac.uk/cgi/retrieve/cgi/retrieve?item-type=repository&api-key=%s&format=Json&limit=%s&offset=%s' % (config.opendoar_apikey, size, offset)
print(url)
response = requests.request("GET", url)
file_out.writelines('\n'.join([json.dumps(record) for record in response.json()['items']]))
file_out.write('\n')
if len(response.json()['items']) < size:
break
offset += size
Loading…
Cancel
Save