In [1]:
from glob import glob
import os
import pandas as pd
In [2]:
ls clean-csv/
In [3]:
csv_df = pd.concat([pd.read_csv(val, header=None,
names=['T1url', 'id'],
dtype=str)
for val in sorted(glob('clean-csv/*clean.csv')) if not
any([key in val.lower() for key in ['abide', 'brain'] ])])
print(csv_df.shape)
csv_df.to_csv('clean-csv/all.csv', index=False)
csv_df.head()
Out[3]:
In [4]:
pd.read_csv('clean-csv/all.csv').T1url[[0, 1, 2]].values.tolist()
Out[4]:
In [5]:
def process_urls(indices):
import os
import urllib.request
import shutil
import pandas as pd
from subprocess import run, PIPE
from nipype.interfaces.afni import SkullStrip
from nipype.interfaces.fsl import BET
df = pd.read_csv('/om/user/satra/projects/metasearch/crawler/clean-csv/all.csv')
if isinstance(indices, list):
urls = df.T1url[indices].values.tolist()
else:
urls = [df.T1url[indices]]
print(urls)
with open('/om/user/satra/projects/metasearch/crawler/token', 'rt') as fp:
token = fp.readlines()[0].strip()
def upload(url, atlas_name, project_name, atlas_label, tmp_aseg, token, post_url=None):
# upload
payload = {'url': url,
'atlasName': atlas_name,
'atlasProject': project_name,
'atlasLabelSet': atlas_label,
'atlas': '@{}'.format(tmp_aseg),
'token': token}
if post_url is None:
post_url = 'http://brainbox.pasteur.fr/mri/upload'
payload['post_url'] = post_url
curl_cmd = ('curl -F url={url} -F atlasName={atlasName} -F atlasProject={atlasProject} '
'-F atlasLabelSet={atlasLabelSet} -F atlas={atlas} -F token={token} '
'{post_url}').format(**payload)
print(curl_cmd)
cproc = run(curl_cmd, shell=True, check=True, stdout=PIPE, stderr=PIPE)
print('stdout:', cproc.stdout.decode())
print('stdout:', cproc.stderr.decode())
return curl_cmd, cproc
out = []
for url in urls:
print(url)
project_name=url.split('/')[6]
if project_name == 'INDI':
project_name = 'HypnosisBarrios'
if 't1.mgz' in url.lower():
download_url = url.replace('T1.mgz', 'aseg.mgz')
file_name = os.path.join(os.getcwd(), 'aseg.mgz')
else:
download_url = url
file_name = os.path.join(os.getcwd(), url.split('/')[-1])
# Download the file from `url` and save it locally under `file_name`:
with urllib.request.urlopen(download_url) as response, open(file_name, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
tmp_aseg = 'aseg.mgz'
atlas_name = 'aseg'
atlas_label = 'freesurfer.json'
if 'aseg.mgz' not in file_name:
skullstrip = SkullStrip(in_file=file_name,
args='-use_edge',
outputtype='NIFTI_GZ',
out_file='aseg.nii.gz')
tmp_aseg = 'aseg.nii.gz'
#skullstrip = BET(in_file=file_name,
# output_type='NIFTI_GZ',
# mask=True)
res = skullstrip.run()
#tmp_aseg = res.outputs.mask_file
import nibabel as nb
import numpy as np
img = nb.load(tmp_aseg)
data = img.get_data()
data = (data > 0).astype(np.uint8)
img.set_data_dtype(np.uint8)
nb.Nifti1Image(data, img.affine, img.header).to_filename('aseg.nii.gz')
tmp_aseg = 'aseg.nii.gz'
atlas_name = 'brainmask'
atlas_label = 'cerebrum.json'
curl_cmd, cproc = upload(url, atlas_name, project_name, atlas_label, tmp_aseg, token)
out.append([curl_cmd, cproc.stdout, cproc.stderr])
if 'aseg.mgz' in file_name:
import nibabel as nb
import numpy as np
img = nb.load(file_name)
data = img.get_data()
data = (data > 0).astype(np.uint8)
img.set_data_dtype(np.uint8)
nb.Nifti1Image(data, img.affine, img.header).to_filename('aseg.nii.gz')
tmp_aseg = 'aseg.nii.gz'
atlas_name = 'brainmask'
atlas_label = 'cerebrum.json'
curl_cmd, cproc = upload(url, atlas_name, project_name, atlas_label, tmp_aseg, token)
out.append([curl_cmd, cproc.stdout, cproc.stderr])
if isinstance(indices, list):
os.unlink(tmp_aseg)
if os.path.exists(file_name):
os.unlink(file_name)
return out
In [8]:
from nipype import Workflow, Node, Function
wf = Workflow('process_url')
processor = Node(Function(input_names=['indices'],
output_names=['cmd'],
function=process_urls),
name='process_urls')
processor.iterables = ('indices', [2481]) #range(csv_df.shape[0]))
wf.add_nodes([processor])
wf.base_dir = '/om/scratch/Mon/satra/'
wf.config['execution']['poll_sleep_duration'] = 10
wf.config['execution']['remove_unnecessary_outputs'] = False
if True:
wf.run()
else:
wf.run('SLURM', plugin_args={'sbatch_args': '-p om_interactive -N1 -c2 --mem=2G',
'max_jobs':60})
In [14]:
pd.np.unique([val.split('/')[6] for val in csv_df.T1url])
Out[14]:
In [8]:
for idx, val in enumerate(csv_df.T1url):
if 'T1.mgz' in val:
print(idx, val)
break
In [ ]: