In [1]:
import sys
import pandas as pd
import numpy as np
import gzip

In [3]:
def summary(filename):
    df = pd.read_table(filename,sep=",")
    print("Summary: "+filename)
    print("X, Mean: {0:f}, standard deviation: {1:f}, Min:{2:f}, Max:{3:f}".format(np.mean(df['x']),
                                                                                   np.std(df['x']),
                                                                                   np.min(df['x']),
                                                                                   np.max(df['x'])))
    print("Y, Mean: {0:f}, standard deviation: {1:f}, Min:{2:f}, Max:{3:f}".format(np.mean(df['y']),
                                                                                   np.std(df['y']),
                                                                                   np.min(df['y']),
                                                                                   np.max(df['y'])))
    print("Correlation coefficient:" ,df['x'].corr(df['y']))


csv_files=["data-1.csv","data-2.csv","data-3.csv","data-4.csv","data-5.csv","data-6.csv"]
for filename in csv_files:
    summary(filename)


Summary: data-1.csv
X, Mean: 8.717696, standard deviation: 12.094587, Min:-22.635878, Max:35.237055
Y, Mean: 12.522874, standard deviation: 6.606681, Min:-2.565088, Max:27.384153
Correlation coefficient: 0.950325695438
Summary: data-2.csv
X, Mean: -0.710787, standard deviation: 11.424031, Min:-33.271113, Max:27.647000
Y, Mean: -0.748987, standard deviation: 6.226550, Min:-18.621804, Max:14.147315
Correlation coefficient: 0.955153166575
Summary: data-3.csv
X, Mean: 0.968907, standard deviation: 2.231829, Min:-2.449064, Max:4.518885
Y, Mean: 8.508535, standard deviation: 1.147384, Min:6.641734, Max:10.551871
Correlation coefficient: 0.981937264676
Summary: data-4.csv
X, Mean: 49.905086, standard deviation: 28.387554, Min:0.000000, Max:87.376700
Y, Mean: 18.553837, standard deviation: 16.475638, Min:0.000000, Max:44.565040
Correlation coefficient: -0.0811930673549
Summary: data-5.csv
X, Mean: 49.909994, standard deviation: 28.378981, Min:4.337892, Max:91.052863
Y, Mean: 18.555525, standard deviation: 16.470559, Min:0.055498, Max:49.934197
Correlation coefficient: -0.0882359898306
Summary: data-6.csv
X, Mean: 7.216155, standard deviation: 11.861841, Min:-24.333821, Max:35.402139
Y, Mean: 7.602215, standard deviation: 11.705413, Min:-22.956357, Max:36.079874
Correlation coefficient: 0.935068566209