import json
import requests
from tqdm import tqdm
import os
[docs]def get_file_from_modac(fname, origin):
""" Downloads a file from the "Model and Data Clearning House" (MoDAC)
repository. Users should already have a MoDAC account to download the data.
Accounts can be created on modac.cancer.gov
Parameters
----------
fname : string
path on disk to save the file
origin : string
original MoDAC URL of the file
Returns
----------
string
Path to the downloaded file
"""
print('Downloading data from modac.cancer.gov, make sure you have an account first.')
total_size_in_bytes = get_dataObject_modac_filesize(origin)
modac_user, modac_token = authenticate_modac()
data = json.dumps({})
headers = {}
headers["Content-Type"] = "application/json"
headers["Authorization"] = "Bearer {0}".format(modac_token)
post_url = origin + '/download'
print("Downloading: " + post_url + " ...")
response = requests.post(post_url, data=data, headers=headers, stream=True)
if response.status_code != 200:
print("Error downloading from modac.cancer.gov")
raise Exception("Response code: {0}, Response message: {1}".format(response.status_code, response.text))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open(fname, 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
raise Exception("ERROR, something went wrong while downloading ", post_url)
print('Saved file to: ' + fname)
return fname
[docs]def register_file_to_modac(file_path, metadata, destination_path):
""" Register a file in the "Model and Data Clearning House" (MoDAC).
The file size is limited to 2GBs
Parameters
----------
file_path : string
path on disk for the file to be uploaded
metadata: dictionary
dictionary of attribute/value pairs of metadata to associate with
the file in MoDaC
destination : string
The path on MoDaC in form of collection/filename
Returns
----------
integer
The returned code from the PUT request
"""
print('Registering the file {0} at MoDaC location:{1}'.format(file_path, destination_path))
register_url = "https://modac.cancer.gov/api/v2/dataObject/" + destination_path
formated_metadata = [dict([("attribute", attribute), ("value", metadata[attribute])]) for attribute in metadata.keys()]
metadata_dict = {"metadataEntries": formated_metadata}
# Based on: https://www.tutorialspoint.com/requests/requests_file_upload.htm
files = {}
files['dataObjectRegistration'] = ('attributes', json.dumps(metadata_dict), "application/json")
files["dataObject"] = (file_path, open(file_path, 'rb'))
modac_user, modac_token = authenticate_modac()
headers = {}
headers["Authorization"] = "Bearer {0}".format(modac_token)
response = requests.put(register_url, headers=headers, files=files)
if response.status_code != 200:
print(response.headers)
print(response.text)
print("Error registering file to modac.cancer.gov")
raise Exception("Response code: {0}, Response message: {1}".format(response.status_code, response.text))
print(response.text, response.status_code)
return response.status_code
[docs]def authenticate_modac(generate_token=False):
"""
Authenticates a user on modac.cancer.gov
Parameters
----------
generate_token : Bool
Either generate a new token, or read saved token if it exists
Returns
----------
tuple(string,string)
tuple with the modac credentials
"""
from os.path import expanduser
home = expanduser("~")
modac_token_dir = os.path.abspath(os.path.join(home, ".nci-modac"))
modac_token_file = "credentials.json"
user_attr = "modac_user"
token_attr = "modac_token"
modac_token_path = os.path.join(modac_token_dir, modac_token_file)
credentials_dic = {}
if not generate_token and os.path.exists(modac_token_path):
with open(modac_token_path) as f:
credentials_dic = json.load(f)
else:
# Get credentials
modac_user = input("MoDaC Username: ")
import getpass
modac_pass = getpass.getpass("MoDaC Password: ")
# Generate token
auth = (modac_user, modac_pass)
auth_url = 'https://modac.cancer.gov/api/authenticate'
print("Authenticating " + modac_user + " ...")
response = requests.get(auth_url, auth=auth, stream=True)
if response.status_code != 200:
print("Error authenticating modac user:{0}", modac_user)
raise Exception("Response code: {0}, Response message: {1}".format(response.status_code, response.text))
else:
token = response.text
if not os.path.exists(modac_token_path):
save_question = "Save MoDaC token in {0}".format(modac_token_path)
save_token = query_yes_no(save_question)
else:
save_token = True
if save_token:
if not os.path.isdir(modac_token_dir):
os.mkdir(modac_token_dir)
credentials_dic[user_attr] = modac_user
credentials_dic[token_attr] = token
with open(modac_token_path, "w") as outfile:
json.dump(credentials_dic, outfile, indent=4)
return (credentials_dic[user_attr], credentials_dic[token_attr])
[docs]def query_yes_no(question, default="yes"):
"""
Ask a yes/no question via raw_input() and return their answer.
Parameters
----------
question: string
string that is presented to the user.
default: boolean
The presumed boolean answer if the user just hits <Enter>.
Returns
----------
boolean
True for "yes" or False for "no".
"""
valid = {"yes": True, "y": True, "ye": True,
"no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
print(question + prompt)
choice = input().lower()
if default is not None and choice == '':
return valid[default]
elif choice in valid:
return valid[choice]
else:
print("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
[docs]def get_dataObject_modac_filesize(data_object_path):
"""
Return the file size in bytes for a modac file
Parameters
----------
data_object_path : string
The path of the file on MoDAC
Returns
----------
integer
file size in bytes
"""
self_dic = get_dataObject_modac_meta(data_object_path)
if "source_file_size" in self_dic.keys():
return int(self_dic["source_file_size"])
else:
return None
[docs]def get_dataObject_modac_md5sum(data_object_path):
"""
Return the md5sum for a modac file
Parameters
----------
data_object_path : string
The path of the file on MoDAC
Returns
----------
string
The md5sum of the file
"""
self_dic = get_dataObject_modac_meta(data_object_path)
if "checksum" in self_dic.keys():
return self_dic["checksum"]
else:
return None
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-a', '--authenticate', action='store_true',
help='Authenticate MoDaC user and create token')
args = parser.parse_args()
if args.authenticate:
authenticate_modac(generate_token=True)