added opendoar dump; refactored dump scripts
This commit is contained in:
parent
85d2dc6cb7
commit
a055b4247e
|
@ -87,3 +87,6 @@ target/
|
|||
|
||||
# Mypy cache
|
||||
.mypy_cache/
|
||||
|
||||
# Configurations
|
||||
config.py
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,9 +1,13 @@
|
|||
import config
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
|
||||
DATE = time.strftime('%Y-%m-%d')
|
||||
|
||||
url = "https://api.fairsharing.org/users/sign_in"
|
||||
|
||||
payload="{\"user\": {\"login\":\"andrea.mannocci@isti.cnr.it\",\"password\":\"\"} }"
|
||||
payload="{\"user\": {\"login\":\"%s\",\"password\":\"%s\"} }" % (config.fairsharing_username, config.fairsharing_password)
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
|
@ -22,13 +26,19 @@ headers = {
|
|||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ' + token,
|
||||
}
|
||||
print(headers)
|
||||
# print(headers)
|
||||
|
||||
with open('./fairsharing.json', 'w') as file_out:
|
||||
for page in range(1, 5):
|
||||
url = 'https://api.fairsharing.org/databases/?page[number]=%s&page[size]=500' % page
|
||||
with open('../../data/raw/fairsharing_dump_%s.json' % DATE, 'w') as file_out:
|
||||
page = 1
|
||||
size = 500
|
||||
while(True):
|
||||
url = 'https://api.fairsharing.org/databases/?page[number]=%s&page[size]=%s' % (page,size)
|
||||
print(url)
|
||||
|
||||
response = requests.request("GET", url, headers=headers)
|
||||
file_out.writelines('\n'.join([json.dumps(record) for record in response.json()['data']]))
|
||||
file_out.write('\n')
|
||||
|
||||
if len(response.json()['data']) < size:
|
||||
break
|
||||
page += 1
|
|
@ -0,0 +1,21 @@
|
|||
import config
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
|
||||
DATE = time.strftime('%Y-%m-%d')
|
||||
|
||||
with open('../../data/raw/opendoar_dump_%s.json' % DATE, 'w') as file_out:
|
||||
size = 100
|
||||
offset = 0
|
||||
while(True):
|
||||
url = 'https://v2.sherpa.ac.uk/cgi/retrieve/cgi/retrieve?item-type=repository&api-key=%s&format=Json&limit=%s&offset=%s' % (config.opendoar_apikey, size, offset)
|
||||
print(url)
|
||||
|
||||
response = requests.request("GET", url)
|
||||
file_out.writelines('\n'.join([json.dumps(record) for record in response.json()['items']]))
|
||||
file_out.write('\n')
|
||||
|
||||
if len(response.json()['items']) < size:
|
||||
break
|
||||
offset += size
|
Loading…
Reference in New Issue