Skip to content
88 changes: 68 additions & 20 deletions dspace_rest_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO
self.auth_request_headers = {'User-Agent': self.USER_AGENT}
self.request_headers = {'Content-type': 'application/json', 'User-Agent': self.USER_AGENT}
self.list_request_headers = {'Content-type': 'text/uri-list', 'User-Agent': self.USER_AGENT}
self.text_plain_request_headers = {'Content-type': 'text/plain', 'User-Agent': self.USER_AGENT}

def authenticate(self, retry=False):
"""
Expand Down Expand Up @@ -167,19 +168,50 @@ def refresh_token(self):
r = self.api_post(self.LOGIN_URL, None, None)
self.update_token(r)

def api_get(self, url, params=None, data=None, headers=None):
def api_get(self, url, params=None, data=None, headers=None, retry=False):
"""
Perform a GET request. Refresh XSRF token if necessary.
@param url: DSpace REST API URL
@param params: any parameters to include (eg ?page=0)
@param data: any data to supply (typically not relevant for GET)
@param headers: any override headers (eg. with short-lived token for download)
@param retry: Has this method already been retried? Used if we need to refresh XSRF.
@return: Response from API
"""
if headers is None:
headers = self.request_headers
r = self.session.get(url, params=params, data=data, headers=headers)
self.update_token(r)

if r.status_code == 403:
# 403 Forbidden
# If we had a CSRF failure, retry the request with the updated token
# After speaking in #dev it seems that these do need occasional refreshes but I suspect
# it's happening too often for me, so check for accidentally triggering it
r_json = parse_json(r)
if 'message' in r_json and 'CSRF token' in r_json['message']:
if retry:
_logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}')
else:
_logger.debug("Retrying request with updated CSRF token")
return self.api_get(url, params=params, data=data, headers=headers, retry=True)

# we need to log in again, if there is login error. This is a bad
# solution copied from the past
elif r.status_code == 401:
r_json = parse_json(r)
if r_json and 'message' in r_json and 'Authentication is required' in r_json['message']:
if retry:
_logger.error(
'API Get: Already retried... something must be wrong')
else:
_logger.debug("API Get: Retrying request with updated CSRF token")
# try to authenticate
self.authenticate()
# Try to authenticate and repeat the request 3 times -
# if it won't happen log error
return self.api_get(url, params=params, data=data, headers=headers, retry=True)

return r

def api_post(self, url, params, json, retry=False):
Expand Down Expand Up @@ -212,12 +244,12 @@ def api_post(self, url, params, json, retry=False):
# solution copied from the past
elif r.status_code == 401:
r_json = parse_json(r)
if 'message' in r_json and 'Authentication is required' in r_json['message']:
if r_json and 'message' in r_json and 'Authentication is required' in r_json['message']:
if retry:
logging.error(
_logger.error(
'API Post: Already retried... something must be wrong')
else:
logging.debug("API Post: Retrying request with updated CSRF token")
_logger.debug("API Post: Retrying request with updated CSRF token")
# try to authenticate
self.authenticate()
# Try to authenticate and repeat the request 3 times -
Expand All @@ -244,13 +276,29 @@ def api_post_uri(self, url, params, uri_list, retry=False):
# After speaking in #dev it seems that these do need occasional refreshes but I suspect
# it's happening too often for me, so check for accidentally triggering it
r_json = r.json()
if 'message' in r_json and 'CSRF token' in r_json['message']:
if r_json and 'message' in r_json and 'CSRF token' in r_json['message']:
if retry:
_logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}')
else:
_logger.debug("Retrying request with updated CSRF token")
return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True)

# we need to log in again, if there is login error. This is a bad
# solution copied from the past
elif r.status_code == 401:
r_json = parse_json(r)
if 'message' in r_json and 'Authentication is required' in r_json['message']:
if retry:
_logger.error(
'API Post: Already retried... something must be wrong')
else:
_logger.debug("API Post: Retrying request with updated CSRF token")
# try to authenticate
self.authenticate()
# Try to authenticate and repeat the request 3 times -
# if it won't happen log error
return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True)

return r

def api_put(self, url, params, json, retry=False):
Expand All @@ -274,7 +322,7 @@ def api_put(self, url, params, json, retry=False):
_logger.debug(r.text)
# Parse response
r_json = parse_json(r)
if 'message' in r_json and 'CSRF token' in r_json['message']:
if r_json and 'message' in r_json and 'CSRF token' in r_json['message']:
if retry:
_logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}')
else:
Expand All @@ -300,14 +348,14 @@ def api_put_uri(self, url, params, uri_list, retry=False):
# If we had a CSRF failure, retry the request with the updated token
# After speaking in #dev it seems that these do need occasional refreshes but I suspect
# it's happening too often for me, so check for accidentally triggering it
logging.debug(r.text)
_logger.debug(r.text)
# Parse response
r_json = parse_json(r)
if 'message' in r_json and 'CSRF token' in r_json['message']:
if r_json and 'message' in r_json and 'CSRF token' in r_json['message']:
if retry:
logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}')
_logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}')
else:
logging.debug("Retrying request with updated CSRF token")
_logger.debug("Retrying request with updated CSRF token")
return self.api_put_uri(url, params=params, uri_list=uri_list, retry=True)

return r
Expand All @@ -332,7 +380,7 @@ def api_delete(self, url, params, retry=False):
_logger.debug(r.text)
# Parse response
r_json = parse_json(r)
if 'message' in r_json and 'CSRF token' in r_json['message']:
if r_json and 'message' in r_json and 'CSRF token' in r_json['message']:
if retry:
_logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}')
else:
Expand All @@ -352,15 +400,15 @@ def api_patch(self, url, operation, path, value, retry=False):
@see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md
"""
if url is None:
logging.error('Missing required URL argument')
_logger.error('Missing required URL argument')
return None
if path is None:
logging.error('Need valid path eg. /withdrawn or /metadata/dc.title/0/language')
_logger.error('Need valid path eg. /withdrawn or /metadata/dc.title/0/language')
return None
if (operation == self.PatchOperation.ADD or operation == self.PatchOperation.REPLACE
or operation == self.PatchOperation.MOVE) and value is None:
# missing value required for add/replace/move operations
logging.error('Missing required "value" argument for add/replace/move operations')
_logger.error('Missing required "value" argument for add/replace/move operations')
return None

# compile patch data
Expand All @@ -386,7 +434,7 @@ def api_patch(self, url, operation, path, value, retry=False):
# it's happening too often for me, so check for accidentally triggering it
_logger.debug(r.text)
r_json = parse_json(r)
if 'message' in r_json and 'CSRF token' in r_json['message']:
if r_json and 'message' in r_json and 'CSRF token' in r_json['message']:
if retry:
_logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}')
else:
Expand Down Expand Up @@ -509,7 +557,7 @@ def update_dso(self, dso, params=None):
return None
dso_type = type(dso)
if not isinstance(dso, SimpleDSpaceObject):
logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community) '
_logger.error('Only SimpleDSpaceObject types (eg Item, Collection, Community) '
'are supported by generic update_dso PUT.')
return dso
try:
Expand Down Expand Up @@ -556,11 +604,11 @@ def delete_dso(self, dso=None, url=None, params=None):
"""
if dso is None:
if url is None:
logging.error('Need a DSO or a URL to delete')
_logger.error('Need a DSO or a URL to delete')
return None
else:
if not isinstance(dso, SimpleDSpaceObject):
logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) '
_logger.error('Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) '
'are supported by generic update_dso PUT.')
return dso
# Get self URI from HAL links
Expand Down Expand Up @@ -988,7 +1036,7 @@ def remove_metadata(self, dso, field, place):
"""
if dso is None or field is None or place is None or not isinstance(dso, DSpaceObject):
# TODO: separate these tests, and add better error handling
logging.error('Invalid or missing DSpace object, field or value string')
_logger.error('Invalid or missing DSpace object, field or value string')
return self
dso_type = type(dso)

Expand Down Expand Up @@ -1022,7 +1070,7 @@ def create_user(self, user, token=None):

def delete_user(self, user):
if not isinstance(user, User):
logging.error('Must be a valid user')
_logger.error('Must be a valid user')
return None
return self.delete_dso(user)

Expand Down