Import these first-- I auto import them every time!:
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
In [2]:
import os
In [3]:
i_max = 256
for j in range(21):
if j == 20:
i_max = 111
for i in range(i_max):
fn = 'http://cdn.gea.esac.esa.int/Gaia/gaia_source/csv/GaiaSource_000-{:03d}-{:03d}.csv.gz'.format(j,i)
executable = 'wget --directory-prefix=../data/GaiaSource/ '+fn
print(executable)
os.system(executable) ## Uncomment to actually download
In [4]:
! ls ../data/GaiaSource/ | tail
How many files are there?
In [5]:
20*256+110
Out[5]:
Each file is about 40 MB. How many GB total is the dataset?
In [6]:
5230*40/1000
Out[6]:
Lots of data. I'm queuing it to download on an external drive connected to GOPC.