In [1]:
from glob import glob
import os
import pandas as pd

In [2]:
ls clean-csv/


ABIDE_Initiative-clean.csv  BrainGenomicsSuperstructProject-clean.csv
ACPI-clean.csv              CORR-clean.csv
ADHD200-clean.csv           HypnosisBarrios-clean.csv
all.csv                     RocklandSample-clean.csv

In [3]:
csv_df = pd.concat([pd.read_csv(val, header=None, 
                                names=['T1url', 'id'], 
                                dtype=str) 
                    for val in sorted(glob('clean-csv/*clean.csv')) if not 
                    any([key in val.lower() for key in ['abide', 'brain'] ])])
print(csv_df.shape)
csv_df.to_csv('clean-csv/all.csv', index=False)
csv_df.head()


(4797, 2)
Out[3]:
T1url id
0 https://s3.amazonaws.com/fcp-indi/data/Project... 0028031
1 https://s3.amazonaws.com/fcp-indi/data/Project... 0028032
2 https://s3.amazonaws.com/fcp-indi/data/Project... 0028033
3 https://s3.amazonaws.com/fcp-indi/data/Project... 0028034
4 https://s3.amazonaws.com/fcp-indi/data/Project... 0028035

In [4]:
pd.read_csv('clean-csv/all.csv').T1url[[0, 1, 2]].values.tolist()


Out[4]:
['https://s3.amazonaws.com/fcp-indi/data/Projects/ACPI/RawData/uci_1/0028031/session_1/anat_1/anat.nii.gz',
 'https://s3.amazonaws.com/fcp-indi/data/Projects/ACPI/RawData/uci_1/0028032/session_1/anat_1/anat.nii.gz',
 'https://s3.amazonaws.com/fcp-indi/data/Projects/ACPI/RawData/uci_1/0028033/session_1/anat_1/anat.nii.gz']

In [5]:
def process_urls(indices):
    import os
    import urllib.request
    import shutil
    import pandas as pd
    from subprocess import run, PIPE
    from nipype.interfaces.afni import SkullStrip
    from nipype.interfaces.fsl import BET
    
    df = pd.read_csv('/om/user/satra/projects/metasearch/crawler/clean-csv/all.csv')
    if isinstance(indices, list):
        urls = df.T1url[indices].values.tolist()
    else:
        urls = [df.T1url[indices]]
    print(urls)
    with open('/om/user/satra/projects/metasearch/crawler/token', 'rt') as fp:
        token = fp.readlines()[0].strip()

    def upload(url, atlas_name, project_name, atlas_label, tmp_aseg, token, post_url=None):
        # upload
        payload = {'url': url,
               'atlasName': atlas_name,
               'atlasProject': project_name,
               'atlasLabelSet': atlas_label,
               'atlas': '@{}'.format(tmp_aseg),
               'token': token}
        if post_url is None:
            post_url = 'http://brainbox.pasteur.fr/mri/upload'
        payload['post_url'] = post_url
        curl_cmd = ('curl -F url={url} -F atlasName={atlasName} -F atlasProject={atlasProject} ' 
                    '-F atlasLabelSet={atlasLabelSet} -F atlas={atlas} -F token={token} ' 
                    '{post_url}').format(**payload)
        print(curl_cmd)
        cproc = run(curl_cmd, shell=True, check=True, stdout=PIPE, stderr=PIPE)
        print('stdout:', cproc.stdout.decode())
        print('stdout:', cproc.stderr.decode())
        return curl_cmd, cproc
    out = []
    for url in urls:
        print(url)
        project_name=url.split('/')[6]
        if project_name == 'INDI':
            project_name = 'HypnosisBarrios'
        if 't1.mgz' in url.lower():
            download_url = url.replace('T1.mgz', 'aseg.mgz')
            file_name = os.path.join(os.getcwd(), 'aseg.mgz')
        else:
            download_url = url
            file_name = os.path.join(os.getcwd(), url.split('/')[-1])
        # Download the file from `url` and save it locally under `file_name`:
        with urllib.request.urlopen(download_url) as response, open(file_name, 'wb') as out_file:
            shutil.copyfileobj(response, out_file)

        tmp_aseg = 'aseg.mgz'
        atlas_name = 'aseg'
        atlas_label = 'freesurfer.json'
        if 'aseg.mgz' not in file_name:
            skullstrip = SkullStrip(in_file=file_name,
                                    args='-use_edge', 
                                    outputtype='NIFTI_GZ',
                                    out_file='aseg.nii.gz')
            tmp_aseg = 'aseg.nii.gz'
            #skullstrip = BET(in_file=file_name,
            #                 output_type='NIFTI_GZ',
            #                 mask=True)
            res = skullstrip.run()
            #tmp_aseg = res.outputs.mask_file
            import nibabel as nb
            import numpy as np
            img = nb.load(tmp_aseg)
            data = img.get_data()
            data = (data > 0).astype(np.uint8)
            img.set_data_dtype(np.uint8)
            nb.Nifti1Image(data, img.affine, img.header).to_filename('aseg.nii.gz')
            tmp_aseg = 'aseg.nii.gz'
            atlas_name = 'brainmask'
            atlas_label = 'cerebrum.json'
        curl_cmd, cproc = upload(url, atlas_name, project_name, atlas_label, tmp_aseg, token)
        out.append([curl_cmd, cproc.stdout, cproc.stderr])

        if 'aseg.mgz' in file_name:
            import nibabel as nb
            import numpy as np
            img = nb.load(file_name)
            data = img.get_data()
            data = (data > 0).astype(np.uint8)
            img.set_data_dtype(np.uint8)
            nb.Nifti1Image(data, img.affine, img.header).to_filename('aseg.nii.gz')
            tmp_aseg = 'aseg.nii.gz'
            atlas_name = 'brainmask'
            atlas_label = 'cerebrum.json'
            curl_cmd, cproc = upload(url, atlas_name, project_name, atlas_label, tmp_aseg, token)
            out.append([curl_cmd, cproc.stdout, cproc.stderr])
            
        if isinstance(indices, list):
            os.unlink(tmp_aseg)
            if os.path.exists(file_name):
                os.unlink(file_name)
    return out

In [8]:
from nipype import Workflow, Node, Function

wf = Workflow('process_url')
processor = Node(Function(input_names=['indices'],
                         output_names=['cmd'],
                         function=process_urls),
                name='process_urls')
processor.iterables = ('indices', [2481]) #range(csv_df.shape[0]))
wf.add_nodes([processor])
wf.base_dir = '/om/scratch/Mon/satra/'
wf.config['execution']['poll_sleep_duration'] = 10
wf.config['execution']['remove_unnecessary_outputs'] = False
if True:
    wf.run()
else:
    wf.run('SLURM', plugin_args={'sbatch_args': '-p om_interactive -N1 -c2 --mem=2G', 
                             'max_jobs':60})


161116-16:52:01,502 workflow INFO:
	 Workflow process_url settings: ['check', 'execution', 'logging']
161116-16:52:01,557 workflow INFO:
	 Running serially.
161116-16:52:01,559 workflow INFO:
	 Executing node process_urls.aI.a0 in dir: /om/scratch/Mon/satra/process_url/_indices_2481/process_urls
['https://s3.amazonaws.com/fcp-indi/data/Projects/CORR/RawData/JHNU/0025605/session_1/anat_1/anat.nii.gz']
https://s3.amazonaws.com/fcp-indi/data/Projects/CORR/RawData/JHNU/0025605/session_1/anat_1/anat.nii.gz
161116-16:52:03,300 interface WARNING:
	 AFNI is outdated, detected version AFNI_16.3.08 and AFNI_16.3.09 is available.
161116-16:52:03,548 interface WARNING:
	 AFNI is outdated, detected version AFNI_16.3.08 and AFNI_16.3.09 is available.
161116-16:53:44,891 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:The intensity in the output dataset is a modified version
161116-16:53:44,893 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:of the intensity in the input volume.
161116-16:53:44,895 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:To obtain a masked version of the input with identical values inside
161116-16:53:44,897 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:the brain, you can either use 3dSkullStrip's -orig_vol option
161116-16:53:44,899 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:or run the following command:
161116-16:53:44,902 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:  3dcalc -a /om/scratch/Mon/satra/process_url/_indices_2481/process_urls/anat.nii.gz -b ./aseg.nii.gz+orig -expr 'a*step(b)' \
161116-16:53:44,904 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:         -prefix ./aseg.nii.gz_orig_vol
161116-16:53:44,905 interface INFO:
	 stderr 2016-11-16T16:53:44.890857:to generate a new masked version of the input.
curl -F url=https://s3.amazonaws.com/fcp-indi/data/Projects/CORR/RawData/JHNU/0025605/session_1/anat_1/anat.nii.gz -F atlasName=brainmask -F atlasProject=CORR -F atlasLabelSet=cerebrum.json -F atlas=@aseg.nii.gz -F token=4f4qluh341iz4cxrmnhr94hv2llhm2t9 http://brainbox.pasteur.fr/mri/upload
stdout: {"filename":"anat.nii.gz","source":"https://s3.amazonaws.com/fcp-indi/data/Projects/CORR/RawData/JHNU/0025605/session_1/anat_1/anat.nii.gz","url":"/data/f6f17ce90123b936b46ae1c98e82c3c8/","included":"2016-11-07T23:23:40.053Z","owner":"satra","mri":{"brain":"anat.nii.gz","atlas":[{"owner":"satra","created":"2016-11-07T23:23:40.053Z","modified":"2016-11-07T23:23:40.053Z","type":"volume","filename":"Atlas.nii.gz","labels":"foreground.json"},{"name":"brainmask","project":"CORR","access":"edit","created":"2016-11-16T21:53:31.546Z","modified":"2016-11-16T21:53:31.546Z","filename":"w9jlmmld7pzdj9k9.nii.gz","originalname":"aseg.nii.gz","labels":"cerebrum.json","owner":"satra","type":"volume"}]},"modified":"2016-11-07T23:23:40.053Z","modifiedBy":"satra","name":"0025605"}
stdout:   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  171k  100   772  100  171k    232  52792  0:00:03  0:00:03 --:--:-- 52809


In [14]:
pd.np.unique([val.split('/')[6] for val in csv_df.T1url])


Out[14]:
array(['ACPI', 'ADHD200', 'CORR', 'INDI', 'RocklandSample'], 
      dtype='<U14')

In [8]:
for idx, val in enumerate(csv_df.T1url):
    if 'T1.mgz' in val:
        print(idx, val)
        break


129 https://s3.amazonaws.com/fcp-indi/data/Projects/ADHD200/surfaces/freesurfer/5.3/0010001/mri/T1.mgz

In [ ]: